@@ -83,40 +83,42 @@ def compute_license_score(codebase):
83
83
"""
84
84
85
85
scoring_elements = ScoringElements ()
86
- declared_licenses = get_declared_license_info_from_top_level_key_files (codebase )
87
- declared_license_expressions = get_declared_license_expressions_from_top_level_key_files (codebase )
86
+ declared_licenses = get_field_values_from_codebase_resources (codebase , 'licenses' , key_files_only = True )
87
+ declared_license_expressions = get_field_values_from_codebase_resources (codebase , 'license_expressions' , key_files_only = True )
88
88
declared_license_categories = get_license_categories (declared_licenses )
89
- copyrights = get_copyrights_from_key_files (codebase )
90
- other_licenses = get_other_licenses (codebase )
89
+ copyrights = get_field_values_from_codebase_resources (codebase , 'copyrights' , key_files_only = True )
90
+ other_licenses = get_field_values_from_codebase_resources (codebase , 'licenses' , key_files_only = False )
91
91
92
92
scoring_elements .declared_license = bool (declared_licenses )
93
- if declared_licenses :
93
+ if scoring_elements . declared_license :
94
94
scoring_elements .score += 40
95
95
96
- precise_license_detection = check_declared_licenses (declared_licenses )
97
- scoring_elements .precise_license_detection = precise_license_detection
98
- if precise_license_detection :
96
+ scoring_elements .precise_license_detection = check_declared_licenses (declared_licenses )
97
+ if scoring_elements .precise_license_detection :
99
98
scoring_elements .score += 40
100
99
101
- has_license_text = check_for_license_texts (declared_licenses )
102
- scoring_elements .has_license_text = has_license_text
103
- if has_license_text :
100
+ scoring_elements .has_license_text = check_for_license_texts (declared_licenses )
101
+ if scoring_elements .has_license_text :
104
102
scoring_elements .score += 10
105
103
106
104
scoring_elements .declared_copyrights = bool (copyrights )
107
- if copyrights :
105
+ if scoring_elements . declared_copyrights :
108
106
scoring_elements .score += 10
109
107
110
108
is_permissively_licensed = check_declared_license_categories (declared_license_categories )
111
109
if is_permissively_licensed :
112
- contains_conflicting_license = check_for_conflicting_licenses (other_licenses )
113
- scoring_elements .conflicting_license_categories = contains_conflicting_license
114
- if contains_conflicting_license and scoring_elements .score > 0 :
110
+ scoring_elements .conflicting_license_categories = check_for_conflicting_licenses (other_licenses )
111
+ if (
112
+ scoring_elements .conflicting_license_categories
113
+ and scoring_elements .score > 0
114
+ ):
115
115
scoring_elements .score -= 20
116
116
117
- ambigous_compound_licensing = check_ambiguous_license_expression (declared_license_expressions )
118
- scoring_elements .ambigous_compound_licensing = ambigous_compound_licensing
119
- if ambigous_compound_licensing and scoring_elements .score > 0 :
117
+ scoring_elements .ambigous_compound_licensing = check_for_license_ambiguity (declared_license_expressions )
118
+ if (
119
+ scoring_elements .ambigous_compound_licensing
120
+ and scoring_elements .score > 0
121
+ ):
120
122
scoring_elements .score -= 10
121
123
122
124
return scoring_elements .to_dict ()
@@ -203,84 +205,31 @@ def is_good_license(detected_license):
203
205
return False
204
206
205
207
206
- def get_declared_license_info_from_top_level_key_files (codebase ):
207
- """
208
- Return a list of "declared" license keys from the expressions as detected in
209
- key files from top-level directories.
210
-
211
- A project has specific key file(s) at the top level of its code hierarchy
212
- such as LICENSE, NOTICE or similar (and/or a package manifest) containing
213
- structured license information such as an SPDX license expression or SPDX
214
- license identifier: when such a file contains "clearly defined" declared
215
- license information, we return this.
208
+ def get_field_values_from_codebase_resources (codebase , field_name , key_files_only = False ):
216
209
"""
217
- declared = []
218
- for resource in codebase .walk (topdown = True ):
219
- if not (resource .is_dir and resource .is_top_level ):
220
- continue
221
- for child in resource .walk (codebase ):
222
- if not child .is_key_file :
223
- continue
224
- for detected_license in getattr (child , 'licenses' , []) or []:
225
- declared .append (detected_license )
226
- return declared
210
+ Return a list of values from the `field_name` field of the Resources from
211
+ `codebase`
227
212
213
+ If `key_files_only` is True, then we only return the field values from
214
+ Resources classified as key files.
228
215
229
- def get_declared_license_expressions_from_top_level_key_files (codebase ):
230
- """
231
- Return a list of "declared" license expressions as detected in key files
232
- from top-level directories.
233
-
234
- A project has specific key file(s) at the top level of its code hierarchy
235
- such as LICENSE, NOTICE or similar (and/or a package manifest) containing
236
- structured license information such as an SPDX license expression or SPDX
237
- license identifier: when such a file contains "clearly defined" declared
238
- license information, we return this.
216
+ If `key_files_only` is False, then we return the field values from Resources
217
+ that are not classified as key files.
239
218
"""
240
- declared = []
219
+ values = []
241
220
for resource in codebase .walk (topdown = True ):
242
221
if not (resource .is_dir and resource .is_top_level ):
243
222
continue
244
223
for child in resource .walk (codebase ):
245
- if not child .is_key_file :
246
- continue
247
- for detected_license_expression in getattr (child , 'license_expressions' , []) or []:
248
- declared .append (detected_license_expression )
249
- return declared
250
-
251
-
252
- def get_other_licenses (codebase ):
253
- """
254
- Return a list of detected licenses from non-key files under a top-level directory
255
- """
256
- other_licenses = []
257
- for resource in codebase .walk (topdown = True ):
258
- if not (resource .is_dir and resource .is_top_level ):
259
- continue
260
- for child in resource .walk (codebase ):
261
- if child .is_key_file :
262
- continue
263
- for detected_license in getattr (child , 'licenses' , []) or []:
264
- other_licenses .append (detected_license )
265
- return other_licenses
266
-
267
-
268
- def get_copyrights_from_key_files (codebase ):
269
- """
270
- Return a list of copyright statements from key files from a top-level directory
271
- """
272
- copyright_statements = []
273
- for resource in codebase .walk (topdown = True ):
274
- if not (resource .is_dir and resource .is_top_level ):
275
- continue
276
- for child in resource .walk (codebase ):
277
- if not child .is_key_file :
278
- continue
279
- for detected_copyright in getattr (child , 'copyrights' , []) or []:
280
- copyright_statement = detected_copyright .get ('copyright' )
281
- if copyright_statement :
282
- copyright_statements .append (copyright_statement )
283
- return copyright_statements
224
+ if key_files_only :
225
+ if not child .is_key_file :
226
+ continue
227
+ else :
228
+ if child .is_key_file :
229
+ continue
230
+ for detected_license in getattr (child , field_name , []) or []:
231
+ values .append (detected_license )
232
+ return values
284
233
285
234
286
235
def get_license_categories (license_infos ):
@@ -362,7 +311,7 @@ def check_for_conflicting_licenses(other_licenses):
362
311
return False
363
312
364
313
365
- def group_license_expressions (declared_license_expressions ):
314
+ def group_license_expressions (unique_license_expressions ):
366
315
"""
367
316
Return a tuple that contains two list of license expressions.
368
317
@@ -372,18 +321,17 @@ def group_license_expressions(declared_license_expressions):
372
321
The second list in the tuple contains unique license
373
322
expressions without "AND", "OR", or "WITH".
374
323
"""
375
- unique_declared_license_expressions = set (declared_license_expressions )
376
324
joined_expressions = []
377
325
single_expressions = []
378
- for declared_license_expression in unique_declared_license_expressions :
326
+ for license_expression in unique_license_expressions :
379
327
if (
380
- 'AND' in declared_license_expression
381
- or 'OR' in declared_license_expression
382
- or 'WITH' in declared_license_expression
328
+ 'AND' in license_expression
329
+ or 'OR' in license_expression
330
+ or 'WITH' in license_expression
383
331
):
384
- joined_expressions .append (declared_license_expression )
332
+ joined_expressions .append (license_expression )
385
333
else :
386
- single_expressions .append (declared_license_expression )
334
+ single_expressions .append (license_expression )
387
335
388
336
licensing = Licensing ()
389
337
unique_joined_expressions = []
@@ -405,13 +353,33 @@ def group_license_expressions(declared_license_expressions):
405
353
return unique_joined_expressions , single_expressions
406
354
407
355
408
- def check_ambiguous_license_expression (declared_license_expressions ):
409
- # Get lists of unique license expressions
356
+ def check_for_license_ambiguity (declared_license_expressions ):
357
+ """
358
+ License ambiguity is the situation where there is a license declaration that makes
359
+ it difficult to construct a reliable license expression, such as in the case
360
+ of multiple licenses where the conjunctive versus disjunctive relationship
361
+ is not well defined.
362
+
363
+ We determine if a list of `declared_license_expressions` has license ambiguity if
364
+ we cannot resolve the `declared_license_expressions` into one expression.
365
+ """
366
+ unique_declared_license_expressions = set (declared_license_expressions )
367
+ # If we only have a single unique license expression, then we do not have
368
+ # any ambiguity about the licensing
369
+ if len (unique_declared_license_expressions ) == 1 :
370
+ return False
371
+
410
372
unique_joined_expressions , single_expressions = group_license_expressions (
411
- declared_license_expressions
373
+ unique_declared_license_expressions
412
374
)
413
- if not unique_joined_expressions and not single_expressions :
414
- return True
375
+
376
+ if not unique_joined_expressions :
377
+ # If we do not have any joined expressions, but multiple single
378
+ # expressions remaining, then we have license ambiguity
379
+ if len (single_expressions ) > 1 :
380
+ return True
381
+ else :
382
+ return False
415
383
416
384
# Group single expressions to joined expressions to see if single
417
385
# expressions are accounted for in a joined expression
@@ -421,14 +389,17 @@ def check_ambiguous_license_expression(declared_license_expressions):
421
389
in unique_joined_expressions
422
390
}
423
391
not_in_joined_expressions = []
424
- # check to see if the single expression is in the joined expression
392
+ # Check to see if the single expression is in the joined expression
425
393
for joined_expression in unique_joined_expressions :
426
394
for expression in single_expressions :
427
395
if expression not in joined_expression :
428
396
not_in_joined_expressions .append (expression )
429
397
else :
430
398
single_expressions_by_joined_expressions [joined_expression ].append (expression )
431
399
400
+ # If we have a single joined license expression and no license expressions
401
+ # that have not been associated with a joined license expression, then we do
402
+ # not have any ambiguity about the license
432
403
if len (single_expressions_by_joined_expressions ) == 1 and not not_in_joined_expressions :
433
404
return False
434
405
else :
0 commit comments