Skip to content

Commit 3e272a8

Browse files
committed
Use common function to collect values #2861
* Fix logic in check_ Signed-off-by: Jono Yang <[email protected]>
1 parent c118f91 commit 3e272a8

File tree

1 file changed

+73
-102
lines changed

1 file changed

+73
-102
lines changed

src/summarycode/score2.py

Lines changed: 73 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -83,40 +83,42 @@ def compute_license_score(codebase):
8383
"""
8484

8585
scoring_elements = ScoringElements()
86-
declared_licenses = get_declared_license_info_from_top_level_key_files(codebase)
87-
declared_license_expressions = get_declared_license_expressions_from_top_level_key_files(codebase)
86+
declared_licenses = get_field_values_from_codebase_resources(codebase, 'licenses', key_files_only=True)
87+
declared_license_expressions = get_field_values_from_codebase_resources(codebase, 'license_expressions', key_files_only=True)
8888
declared_license_categories = get_license_categories(declared_licenses)
89-
copyrights = get_copyrights_from_key_files(codebase)
90-
other_licenses = get_other_licenses(codebase)
89+
copyrights = get_field_values_from_codebase_resources(codebase, 'copyrights', key_files_only=True)
90+
other_licenses = get_field_values_from_codebase_resources(codebase, 'licenses', key_files_only=False)
9191

9292
scoring_elements.declared_license = bool(declared_licenses)
93-
if declared_licenses:
93+
if scoring_elements.declared_license:
9494
scoring_elements.score += 40
9595

96-
precise_license_detection = check_declared_licenses(declared_licenses)
97-
scoring_elements.precise_license_detection = precise_license_detection
98-
if precise_license_detection:
96+
scoring_elements.precise_license_detection = check_declared_licenses(declared_licenses)
97+
if scoring_elements.precise_license_detection:
9998
scoring_elements.score += 40
10099

101-
has_license_text = check_for_license_texts(declared_licenses)
102-
scoring_elements.has_license_text = has_license_text
103-
if has_license_text:
100+
scoring_elements.has_license_text = check_for_license_texts(declared_licenses)
101+
if scoring_elements.has_license_text:
104102
scoring_elements.score += 10
105103

106104
scoring_elements.declared_copyrights = bool(copyrights)
107-
if copyrights:
105+
if scoring_elements.declared_copyrights:
108106
scoring_elements.score += 10
109107

110108
is_permissively_licensed = check_declared_license_categories(declared_license_categories)
111109
if is_permissively_licensed:
112-
contains_conflicting_license = check_for_conflicting_licenses(other_licenses)
113-
scoring_elements.conflicting_license_categories = contains_conflicting_license
114-
if contains_conflicting_license and scoring_elements.score > 0:
110+
scoring_elements.conflicting_license_categories = check_for_conflicting_licenses(other_licenses)
111+
if (
112+
scoring_elements.conflicting_license_categories
113+
and scoring_elements.score > 0
114+
):
115115
scoring_elements.score -= 20
116116

117-
ambigous_compound_licensing = check_ambiguous_license_expression(declared_license_expressions)
118-
scoring_elements.ambigous_compound_licensing = ambigous_compound_licensing
119-
if ambigous_compound_licensing and scoring_elements.score > 0:
117+
scoring_elements.ambigous_compound_licensing = check_for_license_ambiguity(declared_license_expressions)
118+
if (
119+
scoring_elements.ambigous_compound_licensing
120+
and scoring_elements.score > 0
121+
):
120122
scoring_elements.score -= 10
121123

122124
return scoring_elements.to_dict()
@@ -203,84 +205,31 @@ def is_good_license(detected_license):
203205
return False
204206

205207

206-
def get_declared_license_info_from_top_level_key_files(codebase):
207-
"""
208-
Return a list of "declared" license keys from the expressions as detected in
209-
key files from top-level directories.
210-
211-
A project has specific key file(s) at the top level of its code hierarchy
212-
such as LICENSE, NOTICE or similar (and/or a package manifest) containing
213-
structured license information such as an SPDX license expression or SPDX
214-
license identifier: when such a file contains "clearly defined" declared
215-
license information, we return this.
208+
def get_field_values_from_codebase_resources(codebase, field_name, key_files_only=False):
216209
"""
217-
declared = []
218-
for resource in codebase.walk(topdown=True):
219-
if not (resource.is_dir and resource.is_top_level):
220-
continue
221-
for child in resource.walk(codebase):
222-
if not child.is_key_file:
223-
continue
224-
for detected_license in getattr(child, 'licenses', []) or []:
225-
declared.append(detected_license)
226-
return declared
210+
Return a list of values from the `field_name` field of the Resources from
211+
`codebase`
227212
213+
If `key_files_only` is True, then we only return the field values from
214+
Resources classified as key files.
228215
229-
def get_declared_license_expressions_from_top_level_key_files(codebase):
230-
"""
231-
Return a list of "declared" license expressions as detected in key files
232-
from top-level directories.
233-
234-
A project has specific key file(s) at the top level of its code hierarchy
235-
such as LICENSE, NOTICE or similar (and/or a package manifest) containing
236-
structured license information such as an SPDX license expression or SPDX
237-
license identifier: when such a file contains "clearly defined" declared
238-
license information, we return this.
216+
If `key_files_only` is False, then we return the field values from Resources
217+
that are not classified as key files.
239218
"""
240-
declared = []
219+
values = []
241220
for resource in codebase.walk(topdown=True):
242221
if not (resource.is_dir and resource.is_top_level):
243222
continue
244223
for child in resource.walk(codebase):
245-
if not child.is_key_file:
246-
continue
247-
for detected_license_expression in getattr(child, 'license_expressions', []) or []:
248-
declared.append(detected_license_expression)
249-
return declared
250-
251-
252-
def get_other_licenses(codebase):
253-
"""
254-
Return a list of detected licenses from non-key files under a top-level directory
255-
"""
256-
other_licenses = []
257-
for resource in codebase.walk(topdown=True):
258-
if not (resource.is_dir and resource.is_top_level):
259-
continue
260-
for child in resource.walk(codebase):
261-
if child.is_key_file:
262-
continue
263-
for detected_license in getattr(child, 'licenses', []) or []:
264-
other_licenses.append(detected_license)
265-
return other_licenses
266-
267-
268-
def get_copyrights_from_key_files(codebase):
269-
"""
270-
Return a list of copyright statements from key files from a top-level directory
271-
"""
272-
copyright_statements = []
273-
for resource in codebase.walk(topdown=True):
274-
if not (resource.is_dir and resource.is_top_level):
275-
continue
276-
for child in resource.walk(codebase):
277-
if not child.is_key_file:
278-
continue
279-
for detected_copyright in getattr(child, 'copyrights', []) or []:
280-
copyright_statement = detected_copyright.get('copyright')
281-
if copyright_statement:
282-
copyright_statements.append(copyright_statement)
283-
return copyright_statements
224+
if key_files_only:
225+
if not child.is_key_file:
226+
continue
227+
else:
228+
if child.is_key_file:
229+
continue
230+
for detected_license in getattr(child, field_name, []) or []:
231+
values.append(detected_license)
232+
return values
284233

285234

286235
def get_license_categories(license_infos):
@@ -362,7 +311,7 @@ def check_for_conflicting_licenses(other_licenses):
362311
return False
363312

364313

365-
def group_license_expressions(declared_license_expressions):
314+
def group_license_expressions(unique_license_expressions):
366315
"""
367316
Return a tuple that contains two list of license expressions.
368317
@@ -372,18 +321,17 @@ def group_license_expressions(declared_license_expressions):
372321
The second list in the tuple contains unique license
373322
expressions without "AND", "OR", or "WITH".
374323
"""
375-
unique_declared_license_expressions = set(declared_license_expressions)
376324
joined_expressions = []
377325
single_expressions = []
378-
for declared_license_expression in unique_declared_license_expressions:
326+
for license_expression in unique_license_expressions:
379327
if (
380-
'AND' in declared_license_expression
381-
or 'OR' in declared_license_expression
382-
or 'WITH' in declared_license_expression
328+
'AND' in license_expression
329+
or 'OR' in license_expression
330+
or 'WITH' in license_expression
383331
):
384-
joined_expressions.append(declared_license_expression)
332+
joined_expressions.append(license_expression)
385333
else:
386-
single_expressions.append(declared_license_expression)
334+
single_expressions.append(license_expression)
387335

388336
licensing = Licensing()
389337
unique_joined_expressions = []
@@ -405,13 +353,33 @@ def group_license_expressions(declared_license_expressions):
405353
return unique_joined_expressions, single_expressions
406354

407355

408-
def check_ambiguous_license_expression(declared_license_expressions):
409-
# Get lists of unique license expressions
356+
def check_for_license_ambiguity(declared_license_expressions):
357+
"""
358+
License ambiguity is the situation where there is a license declaration that makes
359+
it difficult to construct a reliable license expression, such as in the case
360+
of multiple licenses where the conjunctive versus disjunctive relationship
361+
is not well defined.
362+
363+
We determine if a list of `declared_license_expressions` has license ambiguity if
364+
we cannot resolve the `declared_license_expressions` into one expression.
365+
"""
366+
unique_declared_license_expressions = set(declared_license_expressions)
367+
# If we only have a single unique license expression, then we do not have
368+
# any ambiguity about the licensing
369+
if len(unique_declared_license_expressions) == 1:
370+
return False
371+
410372
unique_joined_expressions, single_expressions = group_license_expressions(
411-
declared_license_expressions
373+
unique_declared_license_expressions
412374
)
413-
if not unique_joined_expressions and not single_expressions:
414-
return True
375+
376+
if not unique_joined_expressions:
377+
# If we do not have any joined expressions, but multiple single
378+
# expressions remaining, then we have license ambiguity
379+
if len(single_expressions) > 1:
380+
return True
381+
else:
382+
return False
415383

416384
# Group single expressions to joined expressions to see if single
417385
# expressions are accounted for in a joined expression
@@ -421,14 +389,17 @@ def check_ambiguous_license_expression(declared_license_expressions):
421389
in unique_joined_expressions
422390
}
423391
not_in_joined_expressions = []
424-
# check to see if the single expression is in the joined expression
392+
# Check to see if the single expression is in the joined expression
425393
for joined_expression in unique_joined_expressions:
426394
for expression in single_expressions:
427395
if expression not in joined_expression:
428396
not_in_joined_expressions.append(expression)
429397
else:
430398
single_expressions_by_joined_expressions[joined_expression].append(expression)
431399

400+
# If we have a single joined license expression and no license expressions
401+
# that have not been associated with a joined license expression, then we do
402+
# not have any ambiguity about the license
432403
if len(single_expressions_by_joined_expressions) == 1 and not not_in_joined_expressions:
433404
return False
434405
else:

0 commit comments

Comments
 (0)