Skip to content

Commit 5f28d5c

Browse files
Update referenced license detection from multiple files
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 49da477 commit 5f28d5c

File tree

6 files changed

+119
-1150
lines changed

6 files changed

+119
-1150
lines changed

src/licensedcode/detection.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,11 +1377,15 @@ def has_references_to_local_files(license_matches):
13771377

13781378
def use_referenced_license_expression(referenced_license_expression, license_detection, licensing=Licensing()):
13791379
"""
1380-
Return True if the `license_detection` LicenseDetection object should
1381-
include the referenced LicenseMatch objects (the `referenced_license_expression`
1382-
LicenseExpression string is the combined License Expression for these matches)
1383-
that it references, otherwise if return False if the LicenseDetection object
1384-
should remain intact.
1380+
Return True if the ``license_detection`` LicenseDetection should include
1381+
the matches represented by the ``referenced_license_expression`` string.
1382+
Return False otherwise.
1383+
1384+
Used when we have a ``license_detection`` with a match to a license rule like
1385+
"See license in COPYING" and where the ``referenced_license_expression`` is the
1386+
expression found in the "COPYING" file, which is the combined expression from
1387+
all license detections found in "COPYING" (or multiple referenced files).
1388+
13851389
Reference: https://github.com/nexB/scancode-toolkit/issues/3547
13861390
"""
13871391
#TODO: Also determing if referenced matches could be added but
@@ -1416,6 +1420,10 @@ def use_referenced_license_expression(referenced_license_expression, license_det
14161420
if same_license_keys and not same_expression:
14171421
return False
14181422

1423+
# when there are many license keys in an expression, and there are no
1424+
# unknown or other cases, we cannot safely conclude that we should
1425+
# follow the license in the referenced filenames. This is likely
1426+
# a case where we have larger notices and several combined expressions,
14191427
if len(referenced_license_keys) > 5:
14201428
return False
14211429

src/licensedcode/plugin_license.py

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def logger_debug(*args):
4545

4646
logger = logging.getLogger(__name__)
4747

48-
if TRACE:
48+
if TRACE or TRACE_REFERENCE:
4949
import sys
5050
logging.basicConfig(stream=sys.stdout)
5151
logger.setLevel(logging.DEBUG)
@@ -217,6 +217,8 @@ def process_codebase(self, codebase, license_text=False, license_diagnostics=Fal
217217
f'before: {license_expressions_before}\n'
218218
f'after : {license_expressions_after}'
219219
)
220+
221+
#raise Exception()
220222

221223
license_detections = collect_license_detections(
222224
codebase=codebase,
@@ -273,8 +275,6 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
273275
license_detection_mapping=license_detection_mapping,
274276
file_path=resource.path,
275277
)
276-
detection_modified = False
277-
detections_added = []
278278
license_match_mappings = license_detection_mapping["matches"]
279279
referenced_filenames = get_referenced_filenames(license_detection.matches)
280280

@@ -285,6 +285,7 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
285285
)
286286
continue
287287

288+
referenced_detections = []
288289
for referenced_filename in referenced_filenames:
289290
referenced_resource = find_referenced_resource(
290291
referenced_filename=referenced_filename,
@@ -293,46 +294,47 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
293294
)
294295

295296
if referenced_resource and referenced_resource.license_detections:
296-
referenced_license_expression = combine_expressions(
297-
expressions=[
298-
detection["license_expression"]
299-
for detection in referenced_resource.license_detections
300-
],
297+
referenced_detections.extend(
298+
referenced_resource.license_detections
301299
)
302-
if not use_referenced_license_expression(
303-
referenced_license_expression=referenced_license_expression,
304-
license_detection=license_detection,
305-
):
306-
if TRACE_REFERENCE:
307-
logger_debug(
308-
f'use_referenced_license_expression: False for '
309-
f'resource: {referenced_resource.path} and '
310-
f'license_expression: {referenced_license_expression}',
311-
)
312-
continue
313-
314-
if TRACE_REFERENCE:
315-
logger_debug(
316-
f'use_referenced_license_expression: True for '
317-
f'resource: {referenced_resource.path} and '
318-
f'license_expression: {referenced_license_expression}',
300+
301+
for detection in referenced_resource.license_detections:
302+
populate_matches_with_path(
303+
matches=detection["matches"],
304+
path=referenced_resource.path
319305
)
320306

321-
modified = True
322-
detection_modified = True
323-
detections_added.extend(referenced_resource.license_detections)
324-
matches_to_extend = get_matches_from_detection_mappings(
325-
license_detections=referenced_resource.license_detections
326-
)
327-
populate_matches_with_path(
328-
matches=matches_to_extend,
329-
path=referenced_resource.path
307+
referenced_license_expression = combine_expressions(
308+
expressions=[
309+
detection["license_expression"]
310+
for detection in referenced_detections
311+
],
312+
)
313+
if not use_referenced_license_expression(
314+
referenced_license_expression=referenced_license_expression,
315+
license_detection=license_detection,
316+
):
317+
if TRACE_REFERENCE:
318+
logger_debug(
319+
f'use_referenced_license_expression: False for '
320+
f'resource: {referenced_resource.path} and '
321+
f'license_expression: {referenced_license_expression}',
330322
)
331-
license_match_mappings.extend(matches_to_extend)
332-
333-
if not detection_modified:
334323
continue
335324

325+
if TRACE_REFERENCE:
326+
logger_debug(
327+
f'use_referenced_license_expression: True for '
328+
f'resource: {referenced_resource.path} and '
329+
f'license_expression: {referenced_license_expression}',
330+
)
331+
332+
modified = True
333+
matches_to_extend = get_matches_from_detection_mappings(
334+
license_detections=referenced_detections
335+
)
336+
license_match_mappings.extend(matches_to_extend)
337+
336338
detection_log, license_expression = get_detected_license_expression(
337339
license_match_mappings=license_match_mappings,
338340
analysis=DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value,
@@ -348,7 +350,7 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase):
348350
license_detection_mapping["detection_log"] = detection_log
349351
license_detection_mapping["identifier"] = get_new_identifier_from_detections(
350352
initial_detection=license_detection_mapping,
351-
detections_added=detections_added,
353+
detections_added=referenced_detections,
352354
license_expression=license_expression,
353355
)
354356

src/packagedcode/licensing.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ def add_referenced_license_matches_for_package(resource, codebase):
9595
)
9696

9797
detections_added = []
98-
detection_modified = False
9998
license_match_mappings = license_detection_mapping["matches"]
10099
referenced_filenames = get_referenced_filenames(license_detection_object.matches)
101100
if not referenced_filenames:
102101
continue
103102

103+
referenced_detections = []
104104
for referenced_filename in referenced_filenames:
105105
referenced_resource = find_referenced_resource(
106106
referenced_filename=referenced_filename,
@@ -109,35 +109,37 @@ def add_referenced_license_matches_for_package(resource, codebase):
109109
)
110110

111111
if referenced_resource and referenced_resource.license_detections:
112-
referenced_license_expression = combine_expressions(
113-
expressions=[
114-
detection["license_expression"]
115-
for detection in referenced_resource.license_detections
116-
],
112+
referenced_detections.extend(
113+
referenced_resource.license_detections
117114
)
118-
if not use_referenced_license_expression(
119-
referenced_license_expression=referenced_license_expression,
120-
license_detection=license_detection_object,
121-
):
122-
continue
123115

124-
modified = True
125-
detection_modified = True
126-
detections_added.extend(referenced_resource.license_detections)
127-
matches_to_extend = get_matches_from_detection_mappings(
128-
license_detections=referenced_resource.license_detections
129-
)
130116
# For LicenseMatches with different resources as origin, add the
131117
# resource path to these matches as origin info
132-
populate_matches_with_path(
133-
matches=matches_to_extend,
134-
path=referenced_resource.path
135-
)
136-
license_match_mappings.extend(matches_to_extend)
118+
for detection in referenced_resource.license_detections:
119+
populate_matches_with_path(
120+
matches=detection["matches"],
121+
path=referenced_resource.path
122+
)
137123

138-
if not detection_modified:
124+
referenced_license_expression = combine_expressions(
125+
expressions=[
126+
detection["license_expression"]
127+
for detection in referenced_detections
128+
],
129+
)
130+
if not use_referenced_license_expression(
131+
referenced_license_expression=referenced_license_expression,
132+
license_detection=license_detection_object,
133+
):
139134
continue
140135

136+
modified = True
137+
detections_added.extend(referenced_resource.license_detections)
138+
matches_to_extend = get_matches_from_detection_mappings(
139+
license_detections=referenced_resource.license_detections,
140+
)
141+
license_match_mappings.extend(matches_to_extend)
142+
141143
detection_log, license_expression = get_detected_license_expression(
142144
license_match_mappings=license_match_mappings,
143145
analysis=DetectionCategory.PACKAGE_UNKNOWN_FILE_REFERENCE_LOCAL.value,

tests/licensedcode/data/plugin_license/ignored_reference/or_and_problem.expected.json

Lines changed: 41 additions & 109 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)