Skip to content

Commit c631e73

Browse files
Fix SPDX rule identifer mismatch bug
Fixes the SPDX rule identifier mismatch bug by getting rid of extra space introduced in SPDX license declaration detection and text cleaning. Also enforce python safe names for identifiers. Reference: #3634 Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 2ddb31c commit c631e73

File tree

101 files changed

+3499
-3094
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+3499
-3094
lines changed

src/licensedcode/licenses_reference.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def process_codebase(self, codebase, **kwargs):
6969
Collect the ``license_references`` and ``rule_references``
7070
list of data mappings and add to the ``codebase``.
7171
"""
72-
include_files = 'license' in kwargs
73-
include_packages = 'package' in kwargs
72+
include_files = hasattr(codebase.attributes, 'license_detections')
73+
include_packages = hasattr(codebase.attributes, 'packages')
7474

7575
license_references, rule_references = collect_license_and_rule_references(
7676
codebase=codebase,
@@ -86,17 +86,24 @@ def collect_license_and_rule_references(codebase, include_packages=True, include
8686
Return a two-tuple of (``license_references``, ``license_rule_references``)
8787
sorted lists of unique mappings collected from a ``codebase``.
8888
"""
89+
if TRACE:
90+
logger_debug(f'include_packages: {include_packages}, include_files: {include_files}')
8991

9092
license_keys = set()
9193
rules_by_identifier = {}
9294

9395
if include_packages:
9496
pks, prules = collect_references_from_packages(codebase)
95-
license_keys.update(pks)
97+
if TRACE:
98+
logger_debug(f'collect_references_from_packages: license keys: {pks}')
99+
logger_debug(f'collect_references_from_packages: rules by id: {prules}')
96100
rules_by_identifier.update(prules)
97101

98102
if include_files:
99103
pks, prules = collect_references_from_files(codebase)
104+
if TRACE:
105+
logger_debug(f'collect_references_from_files: license keys: {pks}')
106+
logger_debug(f'collect_references_from_files: rules by id: {prules}')
100107
license_keys.update(pks)
101108
rules_by_identifier.update(prules)
102109

@@ -140,10 +147,6 @@ def collect_references_from_packages(codebase):
140147
if expression:
141148
license_keys.update(licensing.license_keys(expression))
142149

143-
detections = getattr(resource, 'license_detections', []) or []
144-
rules_by_id = build_rules_from_detection_data(detections)
145-
rules_by_identifier.update(rules_by_id)
146-
147150
for rule in rules_by_identifier.values():
148151
# TODO: consider using the expresion object directly instead
149152
expo = rule.license_expression

src/licensedcode/models.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from commoncode.fileutils import file_base_name
3131
from commoncode.fileutils import file_name
3232
from commoncode.fileutils import resource_iter
33+
from commoncode.text import python_safe_name
3334
from licensedcode import MIN_MATCH_HIGH_LENGTH
3435
from licensedcode import MIN_MATCH_LENGTH
3536
from licensedcode import SMALL_RULE
@@ -2125,6 +2126,10 @@ def from_file(cls, rule_file, is_builtin=True):
21252126
rule.load_data(rule_file=rule_file)
21262127
return rule
21272128

2129+
@property
2130+
def pysafe_expression(self):
2131+
return python_safe_name(self.license_expression)
2132+
21282133
def load_data(self, rule_file):
21292134
"""
21302135
Load data from ``rule_file`` which has both the text and the data (as YAML forntmatter).
@@ -2581,7 +2586,7 @@ class SpdxRule(SynthethicRule):
25812586
"""
25822587

25832588
def __attrs_post_init__(self, *args, **kwargs):
2584-
self.identifier = f'spdx-license-identifier-{self.license_expression}-{self._unique_id}'
2589+
self.identifier = f'spdx-license-identifier-{self.pysafe_expression}-{self._unique_id}'
25852590
self.setup()
25862591

25872592
if not self.license_expression:
@@ -2635,7 +2640,7 @@ class UnDetectedRule(SynthethicRule):
26352640
"""
26362641

26372642
def __attrs_post_init__(self, *args, **kwargs):
2638-
self.identifier = f'package-manifest-{self.license_expression}-{self._unique_id}'
2643+
self.identifier = f'package-manifest-{self.pysafe_expression}-{self._unique_id}'
26392644
expression = self.licensing.parse(self.license_expression)
26402645
self.license_expression = expression.render()
26412646
self.license_expression_object = expression

src/licensedcode/query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ def tokens_by_line(
507507
if spdx_start_offset is not None:
508508
# keep the line, start/end known pos for SPDX matching
509509
spdx_prefix, spdx_expression = split_spdx_lid(line)
510-
spdx_text = ' '.join([spdx_prefix or '', spdx_expression])
510+
spdx_text = ''.join([spdx_prefix or '', spdx_expression])
511511
spdx_start_known_pos = line_first_known_pos + spdx_start_offset
512512

513513
if spdx_start_known_pos <= line_last_known_pos:

tests/licensedcode/data/licenses_reference_reporting/scan-matched-text-with-reference.expected.json

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@
5555
"matched_length": 5,
5656
"match_coverage": 100.0,
5757
"rule_relevance": 100,
58-
"rule_identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
58+
"rule_identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
5959
"rule_url": null,
6060
"matched_text": "Artistic-2.0 OR MIT"
6161
}
6262
],
63-
"identifier": "artistic_2_0_or_mit-529b866b-c702-0328-8f33-363ba46b3370"
63+
"identifier": "artistic_2_0_or_mit-0549e3ec-193d-d46c-a851-893a86e6b231"
6464
}
6565
],
6666
"other_license_expression": null,
@@ -86,7 +86,7 @@
8686
"dependencies": [],
8787
"license_detections": [
8888
{
89-
"identifier": "apache_2_0_and__mit_or_bsd_simplified-a6ac74a7-7a5d-f78e-e6da-54ac6d836a93",
89+
"identifier": "apache_2_0_and__mit_or_bsd_simplified-8f8d79c6-d33c-addb-ff36-9f46bc8eb8a2",
9090
"license_expression": "apache-2.0 AND (mit OR bsd-simplified)",
9191
"license_expression_spdx": "Apache-2.0 AND (MIT OR BSD-2-Clause)",
9292
"detection_count": 1,
@@ -119,7 +119,7 @@
119119
"matched_length": 8,
120120
"match_coverage": 100.0,
121121
"rule_relevance": 100,
122-
"rule_identifier": "spdx-license-identifier-mit OR bsd-simplified-521d0523ce32cc52dd709e9fc653552931947808",
122+
"rule_identifier": "spdx-license-identifier-mit_or_bsd_simplified-521d0523ce32cc52dd709e9fc653552931947808",
123123
"rule_url": null,
124124
"matched_text": "SPDX-License-Identifier: MIT or BSD-2-Clause",
125125
"matched_text_diagnostics": "SPDX-License-Identifier: MIT or BSD-2-Clause"
@@ -152,7 +152,7 @@
152152
]
153153
},
154154
{
155-
"identifier": "artistic_2_0_or_mit-529b866b-c702-0328-8f33-363ba46b3370",
155+
"identifier": "artistic_2_0_or_mit-0549e3ec-193d-d46c-a851-893a86e6b231",
156156
"license_expression": "artistic-2.0 OR mit",
157157
"license_expression_spdx": "Artistic-2.0 OR MIT",
158158
"detection_count": 1,
@@ -169,7 +169,7 @@
169169
"matched_length": 5,
170170
"match_coverage": 100.0,
171171
"rule_relevance": 100,
172-
"rule_identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
172+
"rule_identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
173173
"rule_url": null,
174174
"matched_text": "Artistic-2.0 OR MIT"
175175
}
@@ -417,7 +417,7 @@
417417
},
418418
{
419419
"license_expression": "artistic-2.0 OR mit",
420-
"identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
420+
"identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
421421
"language": "en",
422422
"rule_url": null,
423423
"is_license_text": false,
@@ -444,7 +444,7 @@
444444
},
445445
{
446446
"license_expression": "mit OR bsd-simplified",
447-
"identifier": "spdx-license-identifier-mit OR bsd-simplified-521d0523ce32cc52dd709e9fc653552931947808",
447+
"identifier": "spdx-license-identifier-mit_or_bsd_simplified-521d0523ce32cc52dd709e9fc653552931947808",
448448
"language": "en",
449449
"rule_url": null,
450450
"is_license_text": false,
@@ -524,14 +524,14 @@
524524
"matched_length": 8,
525525
"match_coverage": 100.0,
526526
"rule_relevance": 100,
527-
"rule_identifier": "spdx-license-identifier-mit OR bsd-simplified-f59ff8931aa67ccbfb194b8c7db7d4e5eafb709c",
527+
"rule_identifier": "spdx-license-identifier-mit_or_bsd_simplified-521d0523ce32cc52dd709e9fc653552931947808",
528528
"rule_url": null,
529529
"matched_text": "SPDX-License-Identifier: MIT or BSD-2-Clause",
530530
"matched_text_diagnostics": "SPDX-License-Identifier: MIT or BSD-2-Clause"
531531
}
532532
],
533533
"detection_log": [],
534-
"identifier": "apache_2_0_and__mit_or_bsd_simplified-a6ac74a7-7a5d-f78e-e6da-54ac6d836a93"
534+
"identifier": "apache_2_0_and__mit_or_bsd_simplified-8f8d79c6-d33c-addb-ff36-9f46bc8eb8a2"
535535
}
536536
],
537537
"license_clues": [],
@@ -597,12 +597,12 @@
597597
"matched_length": 5,
598598
"match_coverage": 100.0,
599599
"rule_relevance": 100,
600-
"rule_identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
600+
"rule_identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
601601
"rule_url": null,
602602
"matched_text": "Artistic-2.0 OR MIT"
603603
}
604604
],
605-
"identifier": "artistic_2_0_or_mit-529b866b-c702-0328-8f33-363ba46b3370"
605+
"identifier": "artistic_2_0_or_mit-0549e3ec-193d-d46c-a851-893a86e6b231"
606606
}
607607
],
608608
"other_license_expression": null,

tests/licensedcode/data/licenses_reference_reporting/scan-with-reference.expected.json

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@
5555
"matched_length": 5,
5656
"match_coverage": 100.0,
5757
"rule_relevance": 100,
58-
"rule_identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
58+
"rule_identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
5959
"rule_url": null,
6060
"matched_text": "Artistic-2.0 OR MIT"
6161
}
6262
],
63-
"identifier": "artistic_2_0_or_mit-529b866b-c702-0328-8f33-363ba46b3370"
63+
"identifier": "artistic_2_0_or_mit-0549e3ec-193d-d46c-a851-893a86e6b231"
6464
}
6565
],
6666
"other_license_expression": null,
@@ -86,7 +86,7 @@
8686
"dependencies": [],
8787
"license_detections": [
8888
{
89-
"identifier": "apache_2_0_and__mit_or_bsd_simplified-a6ac74a7-7a5d-f78e-e6da-54ac6d836a93",
89+
"identifier": "apache_2_0_and__mit_or_bsd_simplified-8f8d79c6-d33c-addb-ff36-9f46bc8eb8a2",
9090
"license_expression": "apache-2.0 AND (mit OR bsd-simplified)",
9191
"license_expression_spdx": "Apache-2.0 AND (MIT OR BSD-2-Clause)",
9292
"detection_count": 1,
@@ -116,7 +116,7 @@
116116
"matched_length": 8,
117117
"match_coverage": 100.0,
118118
"rule_relevance": 100,
119-
"rule_identifier": "spdx-license-identifier-mit OR bsd-simplified-c8fa3f8aa8e5819b052e913ac9dec497534a442b",
119+
"rule_identifier": "spdx-license-identifier-mit_or_bsd_simplified-c8fa3f8aa8e5819b052e913ac9dec497534a442b",
120120
"rule_url": null
121121
}
122122
]
@@ -144,7 +144,7 @@
144144
]
145145
},
146146
{
147-
"identifier": "artistic_2_0_or_mit-529b866b-c702-0328-8f33-363ba46b3370",
147+
"identifier": "artistic_2_0_or_mit-0549e3ec-193d-d46c-a851-893a86e6b231",
148148
"license_expression": "artistic-2.0 OR mit",
149149
"license_expression_spdx": "Artistic-2.0 OR MIT",
150150
"detection_count": 1,
@@ -160,7 +160,7 @@
160160
"matched_length": 5,
161161
"match_coverage": 100.0,
162162
"rule_relevance": 100,
163-
"rule_identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
163+
"rule_identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
164164
"rule_url": null
165165
}
166166
]
@@ -407,7 +407,7 @@
407407
},
408408
{
409409
"license_expression": "artistic-2.0 OR mit",
410-
"identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
410+
"identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
411411
"language": "en",
412412
"rule_url": null,
413413
"is_license_text": false,
@@ -434,7 +434,7 @@
434434
},
435435
{
436436
"license_expression": "mit OR bsd-simplified",
437-
"identifier": "spdx-license-identifier-mit OR bsd-simplified-c8fa3f8aa8e5819b052e913ac9dec497534a442b",
437+
"identifier": "spdx-license-identifier-mit_or_bsd_simplified-c8fa3f8aa8e5819b052e913ac9dec497534a442b",
438438
"language": "en",
439439
"rule_url": null,
440440
"is_license_text": false,
@@ -512,11 +512,11 @@
512512
"matched_length": 8,
513513
"match_coverage": 100.0,
514514
"rule_relevance": 100,
515-
"rule_identifier": "spdx-license-identifier-mit OR bsd-simplified-f59ff8931aa67ccbfb194b8c7db7d4e5eafb709c",
515+
"rule_identifier": "spdx-license-identifier-mit_or_bsd_simplified-521d0523ce32cc52dd709e9fc653552931947808",
516516
"rule_url": null
517517
}
518518
],
519-
"identifier": "apache_2_0_and__mit_or_bsd_simplified-a6ac74a7-7a5d-f78e-e6da-54ac6d836a93"
519+
"identifier": "apache_2_0_and__mit_or_bsd_simplified-8f8d79c6-d33c-addb-ff36-9f46bc8eb8a2"
520520
}
521521
],
522522
"license_clues": [],
@@ -582,12 +582,12 @@
582582
"matched_length": 5,
583583
"match_coverage": 100.0,
584584
"rule_relevance": 100,
585-
"rule_identifier": "spdx-license-identifier-artistic-2.0 OR mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
585+
"rule_identifier": "spdx-license-identifier-artistic_2_0_or_mit-512604ee8f4c8e5ccdd4631c2b447196299cd404",
586586
"rule_url": null,
587587
"matched_text": "Artistic-2.0 OR MIT"
588588
}
589589
],
590-
"identifier": "artistic_2_0_or_mit-529b866b-c702-0328-8f33-363ba46b3370"
590+
"identifier": "artistic_2_0_or_mit-0549e3ec-193d-d46c-a851-893a86e6b231"
591591
}
592592
],
593593
"other_license_expression": null,

tests/licensedcode/data/match_spdx/scan-expected.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"license_detections": [
33
{
4-
"identifier": "mit-86af397f-7fb9-6751-2e09-217685ce5b2a",
4+
"identifier": "mit-59928f02-ade3-817b-5db4-dfe1c6738ef4",
55
"license_expression": "mit",
66
"license_expression_spdx": "MIT",
77
"detection_count": 1,
@@ -48,14 +48,14 @@
4848
"matched_length": 5,
4949
"match_coverage": 100.0,
5050
"rule_relevance": 100,
51-
"rule_identifier": "spdx-license-identifier-mit-6fcd9fa0c61347e06feb569ce3335c1b374640a1",
51+
"rule_identifier": "spdx-license-identifier-mit-38fe0b852f1f8545c4a1b7ac0e456b182dbeb3ab",
5252
"rule_url": null,
5353
"matched_text": "<a href=\"https://licenses.nuget.org/MIT\">MIT</a> </div>",
5454
"matched_text_diagnostics": "licenses.nuget.org/MIT\">MIT</"
5555
}
5656
],
5757
"detection_log": [],
58-
"identifier": "mit-86af397f-7fb9-6751-2e09-217685ce5b2a"
58+
"identifier": "mit-59928f02-ade3-817b-5db4-dfe1c6738ef4"
5959
}
6060
],
6161
"license_clues": [],

0 commit comments

Comments
 (0)