Skip to content

Commit 2ddb31c

Browse files
Add new attribute matched_text_diagnostics
This commit adds a new attribute for license text diagnostics which will be added when the CLI option `--license-text-diagnostics` is used, in contrast to earlier behaviour, where this diagnostics matched text used to overwrite the text in `matched_text`. Also makes sure top-level license/package summarizations have matched text and diagnostics correcttly only when the respective CLI options are used. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent d96e69e commit 2ddb31c

File tree

107 files changed

+11529
-10343
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+11529
-10343
lines changed

src/licensedcode/detection.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,12 @@ class LicenseMatchFromResult(LicenseMatch):
606606
help='Text which was matched')
607607
)
608608

609+
matched_text_diagnostics = attr.ib(
610+
default=None,
611+
metadata=dict(
612+
help='Text which was matched, with extra diagnostics information.')
613+
)
614+
609615
def score(self):
610616
return self.match_score
611617

@@ -631,6 +637,7 @@ def from_dict(cls, license_match_mapping):
631637
"""
632638
rule = Rule.from_match_data(license_match_mapping)
633639
matched_text = license_match_mapping.get("matched_text") or None
640+
matched_text_diagnostics = license_match_mapping.get("matched_text_diagnostics") or None
634641

635642
return cls(
636643
from_file=license_match_mapping["from_file"],
@@ -641,6 +648,7 @@ def from_dict(cls, license_match_mapping):
641648
match_coverage=license_match_mapping["match_coverage"],
642649
matcher=license_match_mapping["matcher"],
643650
text=matched_text,
651+
matched_text_diagnostics=matched_text_diagnostics,
644652
rule=rule,
645653
qspan=None,
646654
ispan=None,
@@ -664,10 +672,6 @@ def to_dict(
664672
"""
665673
Return a "result" scan data built from a LicenseMatch object.
666674
"""
667-
matched_text = None
668-
if include_text:
669-
matched_text = self.matched_text
670-
671675
result = {}
672676

673677
result['license_expression'] = self.rule.license_expression
@@ -689,8 +693,10 @@ def to_dict(
689693
if rule_details:
690694
result["rule_notes"] = self.rule.notes
691695
result["referenced_filenames"] = self.rule.referenced_filenames
692-
if include_text:
693-
result['matched_text'] = matched_text
696+
if include_text and self.matched_text:
697+
result['matched_text'] = self.matched_text
698+
if license_text_diagnostics and self.matched_text_diagnostics:
699+
result['matched_text_diagnostics'] = self.matched_text_diagnostics
694700
if rule_details:
695701
result["rule_text"] = self.rule.text
696702

@@ -929,7 +935,11 @@ def get_unique_detections(cls, license_detections):
929935

930936
return unique_license_detections
931937

932-
def to_dict(self, license_diagnostics):
938+
def to_dict(self,
939+
include_text=False,
940+
license_text_diagnostics=False,
941+
license_diagnostics=False,
942+
):
933943

934944
def dict_fields(attr, value):
935945

@@ -946,7 +956,10 @@ def dict_fields(attr, value):
946956

947957
detection_mapping = attr.asdict(self, filter=dict_fields)
948958
detection_mapping["sample_matches"] = [
949-
match.to_dict(include_text=True)
959+
match.to_dict(
960+
include_text=include_text,
961+
license_text_diagnostics=license_text_diagnostics,
962+
)
950963
for match in self.matches
951964
]
952965
return detection_mapping

src/licensedcode/match.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ def to_dict(
773773
spdx_license_url=SPDX_LICENSE_URL,
774774
include_text=False,
775775
license_text_diagnostics=False,
776-
whole_lines=True,
776+
whole_lines=False,
777777
file_path=None,
778778
):
779779
"""
@@ -785,11 +785,11 @@ def to_dict(
785785
if include_text:
786786
if license_text_diagnostics:
787787
matched_text_diagnostics = self.matched_text(whole_lines=False, highlight=True)
788+
789+
if whole_lines:
790+
matched_text = self.matched_text(whole_lines=True, highlight=False)
788791
else:
789-
if whole_lines:
790-
matched_text = self.matched_text(whole_lines=True, highlight=False)
791-
else:
792-
matched_text = self.matched_text(whole_lines=False, highlight=False)
792+
matched_text = self.matched_text(whole_lines=False, highlight=False)
793793

794794
result = {}
795795

@@ -808,8 +808,8 @@ def to_dict(
808808

809809
if include_text:
810810
result['matched_text'] = matched_text
811-
if license_text_diagnostics:
812-
result['matched_text_diagnostics'] = matched_text_diagnostics
811+
if license_text_diagnostics:
812+
result['matched_text_diagnostics'] = matched_text_diagnostics
813813
return result
814814

815815
def get_highlighted_text(self, trace=TRACE_HIGHLIGHTED_TEXT):

src/licensedcode/plugin_license.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def get_scanner(
170170
unknown_licenses=unknown_licenses,
171171
)
172172

173-
def process_codebase(self, codebase, license_diagnostics, **kwargs):
173+
def process_codebase(self, codebase, license_text=False, license_diagnostics=False, license_text_diagnostics=False, **kwargs):
174174
"""
175175
Post-process ``codebase`` to follow referenced filenames to license
176176
matches in other files.
@@ -231,7 +231,11 @@ def process_codebase(self, codebase, license_diagnostics, **kwargs):
231231
)
232232

233233
unsorted_license_detections = [
234-
unique_detection.to_dict(license_diagnostics=license_diagnostics)
234+
unique_detection.to_dict(
235+
include_text=license_text,
236+
license_diagnostics=license_diagnostics,
237+
license_text_diagnostics=license_text_diagnostics,
238+
)
235239
for unique_detection in unique_license_detections
236240
]
237241
codebase.attributes.license_detections.extend(

src/packagedcode/plugin_package.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def get_scanner(self, package=True, system_package=False, **kwargs):
185185
system=system_package,
186186
)
187187

188-
def process_codebase(self, codebase, strip_root=False, **kwargs):
188+
def process_codebase(self, codebase, strip_root=False, license_text=False, license_diagnostics=False, license_text_diagnostics=False, **kwargs):
189189
"""
190190
Populate the ``codebase`` top level ``packages`` and ``dependencies``
191191
with package and dependency instances, assembling parsed package data

src/summarycode/todo.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,22 @@ def process_codebase(self, codebase, **kwargs):
100100
if hasattr(codebase.root, 'license_detections'):
101101
has_licenses = True
102102

103+
license_diagnostics = kwargs.get("license_diagnostics")
104+
license_text = kwargs.get("license_text")
105+
license_text_diagnostics = kwargs.get("license_text_diagnostics")
106+
if not license_diagnostics or not license_text or not license_text_diagnostics:
107+
usage_suggestion_message = (
108+
"The --review option, whe paired with --license option should be used with the folowing "
109+
"additional CLI options for maximum benifit: [`--license-text`, `--license-text-diagnostics`,"
110+
"--license-diagnostics`] as these show additional diagnostic information to help review the issues."
111+
)
112+
warnings.simplefilter('always', ToDoPluginUsageWarning)
113+
warnings.warn(
114+
usage_suggestion_message,
115+
ToDoPluginUsageWarning,
116+
stacklevel=2,
117+
)
118+
103119
if not has_packages and not has_licenses:
104120
usage_suggestion_message = (
105121
"The --review option should be used with atleast one of the license [`--license`], "
@@ -323,11 +339,13 @@ def dict_fields(attr, value):
323339
matches_with_details = []
324340
for license_match in detection_mapping["detection"]["matches"]:
325341
license_match_obj = LicenseMatchFromResult.from_dict(license_match)
326-
matches_with_details.append(license_match_obj.to_dict(
327-
include_text=True,
328-
license_text_diagnostics=True,
329-
rule_details=True,
330-
))
342+
matches_with_details.append(
343+
license_match_obj.to_dict(
344+
include_text=True,
345+
license_text_diagnostics=True,
346+
rule_details=True,
347+
)
348+
)
331349
detection_mapping["detection"]["matches"] = matches_with_details
332350

333351
return detection_mapping

tests/cluecode/data/plugin_filter_clues/filtered-expected.json

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,18 @@
77
"detection_count": 1,
88
"sample_matches": [
99
{
10-
"score": 96.07,
10+
"license_expression": "apache-1.1",
11+
"license_expression_spdx": "Apache-1.1",
1112
"from_file": "LICENSE",
1213
"start_line": 7,
1314
"end_line": 70,
15+
"matcher": "3-seq",
16+
"score": 96.07,
1417
"matched_length": 367,
1518
"match_coverage": 100.0,
16-
"matcher": "3-seq",
17-
"license_expression": "apache-1.1",
18-
"license_expression_spdx": "Apache-1.1",
19-
"rule_identifier": "apache-1.1_63.RULE",
2019
"rule_relevance": 100,
21-
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE",
22-
"matched_text": null
20+
"rule_identifier": "apache-1.1_63.RULE",
21+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE"
2322
}
2423
]
2524
}
@@ -52,17 +51,17 @@
5251
"license_expression_spdx": "Apache-1.1",
5352
"matches": [
5453
{
55-
"score": 96.07,
54+
"license_expression": "apache-1.1",
55+
"spdx_license_expression": "Apache-1.1",
56+
"from_file": "LICENSE",
5657
"start_line": 7,
5758
"end_line": 70,
58-
"from_file": "LICENSE",
59+
"matcher": "3-seq",
60+
"score": 96.07,
5961
"matched_length": 367,
6062
"match_coverage": 100.0,
61-
"matcher": "3-seq",
62-
"license_expression": "apache-1.1",
63-
"spdx_license_expression": "Apache-1.1",
64-
"rule_identifier": "apache-1.1_63.RULE",
6563
"rule_relevance": 100,
64+
"rule_identifier": "apache-1.1_63.RULE",
6665
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE"
6766
}
6867
],

tests/cluecode/data/plugin_filter_clues/filtered-expected2.json

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,18 @@
77
"detection_count": 1,
88
"sample_matches": [
99
{
10-
"score": 100.0,
10+
"license_expression": "pygres-2.2",
11+
"license_expression_spdx": "LicenseRef-scancode-pygres-2.2",
1112
"from_file": "LICENSE2",
1213
"start_line": 7,
1314
"end_line": 22,
15+
"matcher": "2-aho",
16+
"score": 100.0,
1417
"matched_length": 145,
1518
"match_coverage": 100.0,
16-
"matcher": "2-aho",
17-
"license_expression": "pygres-2.2",
18-
"license_expression_spdx": "LicenseRef-scancode-pygres-2.2",
19-
"rule_identifier": "pygres-2.2_2.RULE",
2019
"rule_relevance": 100,
21-
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE",
22-
"matched_text": null
20+
"rule_identifier": "pygres-2.2_2.RULE",
21+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE"
2322
}
2423
]
2524
}
@@ -52,17 +51,17 @@
5251
"license_expression_spdx": "LicenseRef-scancode-pygres-2.2",
5352
"matches": [
5453
{
55-
"score": 100.0,
54+
"license_expression": "pygres-2.2",
55+
"spdx_license_expression": "LicenseRef-scancode-pygres-2.2",
56+
"from_file": "LICENSE2",
5657
"start_line": 7,
5758
"end_line": 22,
58-
"from_file": "LICENSE2",
59+
"matcher": "2-aho",
60+
"score": 100.0,
5961
"matched_length": 145,
6062
"match_coverage": 100.0,
61-
"matcher": "2-aho",
62-
"license_expression": "pygres-2.2",
63-
"spdx_license_expression": "LicenseRef-scancode-pygres-2.2",
64-
"rule_identifier": "pygres-2.2_2.RULE",
6563
"rule_relevance": 100,
64+
"rule_identifier": "pygres-2.2_2.RULE",
6665
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE"
6766
}
6867
],

tests/cluecode/data/plugin_filter_clues/filtered-expected3.json

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,18 @@
77
"detection_count": 1,
88
"sample_matches": [
99
{
10-
"score": 100.0,
10+
"license_expression": "pcre",
11+
"license_expression_spdx": "LicenseRef-scancode-pcre",
1112
"from_file": "LICENSE3",
1213
"start_line": 1,
1314
"end_line": 47,
15+
"matcher": "1-hash",
16+
"score": 100.0,
1417
"matched_length": 303,
1518
"match_coverage": 100.0,
16-
"matcher": "1-hash",
17-
"license_expression": "pcre",
18-
"license_expression_spdx": "LicenseRef-scancode-pcre",
19-
"rule_identifier": "pcre.LICENSE",
2019
"rule_relevance": 100,
21-
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE",
22-
"matched_text": null
20+
"rule_identifier": "pcre.LICENSE",
21+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE"
2322
}
2423
]
2524
}
@@ -52,17 +51,17 @@
5251
"license_expression_spdx": "LicenseRef-scancode-pcre",
5352
"matches": [
5453
{
55-
"score": 100.0,
54+
"license_expression": "pcre",
55+
"spdx_license_expression": "LicenseRef-scancode-pcre",
56+
"from_file": "LICENSE3",
5657
"start_line": 1,
5758
"end_line": 47,
58-
"from_file": "LICENSE3",
59+
"matcher": "1-hash",
60+
"score": 100.0,
5961
"matched_length": 303,
6062
"match_coverage": 100.0,
61-
"matcher": "1-hash",
62-
"license_expression": "pcre",
63-
"spdx_license_expression": "LicenseRef-scancode-pcre",
64-
"rule_identifier": "pcre.LICENSE",
6563
"rule_relevance": 100,
64+
"rule_identifier": "pcre.LICENSE",
6665
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE"
6766
}
6867
],

0 commit comments

Comments
 (0)