Skip to content

Commit fe240a2

Browse files
Merge pull request #3609 from nexB/3596-do-not-crash-on-file-input
Validate CLI inputs and paths #3596
2 parents 0c9fd4b + 0d6caa8 commit fe240a2

26 files changed

+620
-277
lines changed

src/formattedcode/__init__.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,62 @@ def convert(self, value, param, ctx):
2525
if value in known_opts:
2626
self.fail(
2727
'Illegal file name conflicting with an option name: '
28-
f'{ os.fsdecode(value)}. '
28+
f'{os.fsdecode(value)!r}. '
2929
'Use the special "-" file name to print results on screen/stdout.',
3030
param,
3131
ctx,
3232
)
33+
34+
try:
35+
validate_output_file_path(location=value)
36+
except Exception as e:
37+
self.fail(str(e), param, ctx)
38+
3339
return click.File.convert(self, value, param, ctx)
40+
41+
42+
class InvalidScanCodeOutputFileError(Exception):
43+
pass
44+
45+
46+
def validate_output_file_path(location):
47+
"""
48+
Raise an InvalidScanCodeOutputFileError if the output file is invalid.
49+
"""
50+
if location != "-":
51+
from pathlib import Path
52+
from commoncode.filetype import is_writable
53+
54+
path = Path(location)
55+
56+
if path.is_dir():
57+
raise InvalidScanCodeOutputFileError(
58+
f'output file is a directory, not a file: {os.fsdecode(location)!r}',
59+
)
60+
61+
if path.is_fifo() or path.is_socket() or path.is_block_device() or path.is_char_device():
62+
raise InvalidScanCodeOutputFileError(
63+
f'output file cannot be a special/char/device/fifo/pipe file: {os.fsdecode(location)!r}',
64+
)
65+
66+
if path.exists():
67+
if not path.is_file():
68+
raise InvalidScanCodeOutputFileError(
69+
f'output file exists and is not a file: {os.fsdecode(location)!r}',
70+
)
71+
if not is_writable(location):
72+
raise InvalidScanCodeOutputFileError(
73+
f'output file exists and is not writable: {os.fsdecode(location)!r}',
74+
)
75+
76+
else:
77+
parent = path.parent
78+
if not parent.exists() or not parent.is_dir():
79+
raise InvalidScanCodeOutputFileError(
80+
f'output file parent is not a directory or does not exists: {os.fsdecode(location)!r}',
81+
)
82+
83+
if not is_writable(str(parent)):
84+
raise InvalidScanCodeOutputFileError(
85+
f'output file parent is not a writable directory: {os.fsdecode(location)!r}',
86+
)

src/licensedcode/plugin_license_policy.py

Lines changed: 69 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,24 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import os
11+
import logging
12+
13+
from collections import defaultdict
1014
from os.path import exists
1115
from os.path import isdir
1216

1317
import attr
14-
import os
15-
import logging
18+
import click
1619
import saneyaml
1720

18-
from plugincode.post_scan import PostScanPlugin
19-
from plugincode.post_scan import post_scan_impl
2021
from commoncode.cliutils import PluggableCommandLineOption
2122
from commoncode.cliutils import POST_SCAN_GROUP
23+
from commoncode.filetype import is_file
24+
from commoncode.filetype import is_readable
2225
from licensedcode.detection import get_license_keys_from_detections
23-
26+
from plugincode.post_scan import PostScanPlugin
27+
from plugincode.post_scan import post_scan_impl
2428

2529
TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_POLICY', False)
2630

@@ -42,6 +46,21 @@ def logger_debug(*args):
4246
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
4347

4448

49+
def validate_policy_path(ctx, param, value):
50+
"""
51+
Validate the ``value`` of the policy file path
52+
"""
53+
policy = value
54+
if policy:
55+
if not is_file(location=value, follow_symlinks=True):
56+
raise click.BadParameter(f"policy file is not a regular file: {value!r}")
57+
58+
if not is_readable(location=value):
59+
raise click.BadParameter(f"policy file is not readable: {value!r}")
60+
policy = load_license_policy(value)
61+
return policy
62+
63+
4564
@post_scan_impl
4665
class LicensePolicy(PostScanPlugin):
4766
"""
@@ -57,10 +76,12 @@ class LicensePolicy(PostScanPlugin):
5776
options = [
5877
PluggableCommandLineOption(('--license-policy',),
5978
multiple=False,
79+
callback=validate_policy_path,
6080
metavar='FILE',
6181
help='Load a License Policy file and apply it to the scan at the '
6282
'Resource level.',
63-
help_group=POST_SCAN_GROUP)
83+
help_group=POST_SCAN_GROUP,
84+
)
6485
]
6586

6687
def is_enabled(self, license_policy, **kwargs):
@@ -74,12 +95,19 @@ def process_codebase(self, codebase, license_policy, **kwargs):
7495
if not self.is_enabled(license_policy):
7596
return
7697

77-
if has_policy_duplicates(license_policy):
78-
codebase.errors.append('ERROR: License Policy file contains duplicate entries.\n')
98+
# license_policy has been validated through a callback and contains data
99+
# loaded from YAML
100+
policies = license_policy.get('license_policies', [])
101+
if not policies:
102+
codebase.errors.append(f'ERROR: License Policy file is empty')
79103
return
80104

81105
# get a list of unique license policies from the license_policy file
82-
policies = load_license_policy(license_policy).get('license_policies', [])
106+
dupes = get_duplicate_policies(policies)
107+
if dupes:
108+
dupes = '\n'.join(repr(d) for d in dupes.items())
109+
codebase.errors.append(f'ERROR: License Policy file contains duplicate entries:\n{dupes}')
110+
return
83111

84112
# apply policy to Resources if they contain an offending license
85113
for resource in codebase.walk(topdown=True):
@@ -106,37 +134,46 @@ def process_codebase(self, codebase, license_policy, **kwargs):
106134
codebase.save_resource(resource)
107135

108136

109-
def has_policy_duplicates(license_policy_location):
137+
def get_duplicate_policies(policies):
110138
"""
111-
Returns True if the policy file contains duplicate entries for a specific license
112-
key. Returns False otherwise.
139+
Return a list of duplicated policy mappings based on the license key.
140+
Return an empty list if there are no duplicates.
113141
"""
114-
policies = load_license_policy(license_policy_location).get('license_policies', [])
115-
116-
unique_policies = {}
117-
118-
if policies == []:
119-
return False
142+
if not policies:
143+
return []
120144

145+
policies_by_license = defaultdict(list)
121146
for policy in policies:
122147
license_key = policy.get('license_key')
123-
124-
if license_key in unique_policies.keys():
125-
return True
126-
else:
127-
unique_policies[license_key] = policy
128-
129-
return False
148+
policies_by_license[license_key].append(policy)
149+
return {key: pols for key, pols in policies_by_license.items() if len(pols) > 1}
130150

131151

132152
def load_license_policy(license_policy_location):
133153
"""
134-
Return a license_policy dictionary loaded from a license policy file.
154+
Return a license policy mapping loaded from a license policy file.
135155
"""
136-
if not license_policy_location or not exists(license_policy_location):
137-
return {}
138-
elif isdir(license_policy_location):
156+
if not license_policy_location:
139157
return {}
140-
with open(license_policy_location, 'r') as conf:
141-
conf_content = conf.read()
142-
return saneyaml.load(conf_content)
158+
159+
if not exists(license_policy_location):
160+
raise click.BadParameter(f"policy file does not exists: {license_policy_location!r} ")
161+
162+
if isdir(license_policy_location):
163+
raise click.BadParameter(f"policy file is a directory: {license_policy_location!r} ")
164+
165+
try:
166+
with open(license_policy_location, 'r') as conf:
167+
conf_content = conf.read()
168+
policy = saneyaml.load(conf_content)
169+
if not policy:
170+
raise click.BadParameter(f"policy file is empty: {license_policy_location!r}")
171+
if "license_policies" not in policy:
172+
raise click.BadParameter(f"policy file is missing a 'license_policies' attribute: {license_policy_location!r} ")
173+
except Exception as e:
174+
if isinstance(e, click.BadParameter):
175+
raise e
176+
else:
177+
raise click.BadParameter(f"policy file is not a well formed or readable YAML file: {license_policy_location!r} {e!r}") from e
178+
return policy
179+

src/scancode/cli.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# Import early because of the side effects
1414
import scancode_config
1515

16+
import json
1617
import logging
1718
import os
1819
import platform
@@ -42,6 +43,9 @@ class WindowsError(Exception):
4243
from commoncode.cliutils import path_progress_message
4344
from commoncode.cliutils import progressmanager
4445
from commoncode.cliutils import PluggableCommandLineOption
46+
from commoncode.filetype import is_dir
47+
from commoncode.filetype import is_file
48+
from commoncode.filetype import is_readable
4549
from commoncode.fileutils import as_posixpath
4650
from commoncode.timeutils import time2tstamp
4751
from commoncode.resource import Codebase
@@ -68,7 +72,6 @@ class WindowsError(Exception):
6872
from scancode.interrupt import interruptible
6973
from scancode.pool import ScanCodeTimeoutError
7074

71-
7275
# Tracing flags
7376
TRACE = False
7477
TRACE_DEEP = False
@@ -173,6 +176,32 @@ def validate_depth(ctx, param, value):
173176
return value
174177

175178

179+
def validate_input_path(ctx, param, value):
180+
"""
181+
Validate a ``value`` list of inputs path strings
182+
"""
183+
options = ctx.params
184+
from_json = options.get("--from-json", False)
185+
for inp in value:
186+
if not (is_file(location=inp, follow_symlinks=True) or is_dir(location=inp, follow_symlinks=True)):
187+
raise click.BadParameter(f"input: {inp!r} is not a regular file or a directory")
188+
189+
if not is_readable(location=inp):
190+
raise click.BadParameter(f"input: {inp!r} is not readable")
191+
192+
if from_json and not is_file(location=inp, follow_symlinks=True):
193+
# extra JSON validation
194+
raise click.BadParameter(f"JSON input: {inp!r} is not a file")
195+
if not inp.lower().endswith(".json"):
196+
raise click.BadParameter(f"JSON input: {inp!r} is not a JSON file with a .json extension")
197+
with open(inp) as js:
198+
start = js.read(100).strip()
199+
if not start.startswith("{"):
200+
raise click.BadParameter(f"JSON input: {inp!r} is not a well formed JSON file")
201+
202+
return value
203+
204+
176205
@click.command(name='scancode',
177206
epilog=epilog_text,
178207
cls=ScancodeCommand,
@@ -182,6 +211,7 @@ def validate_depth(ctx, param, value):
182211

183212
@click.argument('input',
184213
metavar='<OUTPUT FORMAT OPTION(s)> <input>...', nargs=-1,
214+
callback=validate_input_path,
185215
type=click.Path(exists=True, readable=True, path_type=str))
186216

187217
@click.option('--strip-root',
@@ -850,9 +880,12 @@ def echo_func(*_args, **_kwargs):
850880
max_in_memory=max_in_memory,
851881
max_depth=max_depth,
852882
)
853-
except:
854-
msg = 'ERROR: failed to collect codebase at: %(input)r' % locals()
855-
raise ScancodeError(msg + '\n' + traceback.format_exc())
883+
except Exception as e:
884+
if from_json and isinstance(e, (json.decoder.JSONDecodeError, UnicodeDecodeError)):
885+
raise click.BadParameter(f"Input JSON scan file(s) is not valid JSON: {input!r} : {e!r}")
886+
else:
887+
msg = f'Failed to process codebase at: {input!r}'
888+
raise ScancodeError(msg + '\n' + traceback.format_exc())
856889

857890
# update headers
858891
cle = codebase.get_or_create_current_header()

0 commit comments

Comments
 (0)