Skip to content

Commit 6e8d2f9

Browse files
upgrade spdx-tools to v0.8
as the library has been refactored, this requires some code adaption of the current output_spdx.py and regeneration of the test files Signed-off-by: Armin Tänzer <[email protected]>
1 parent 5dfcfcb commit 6e8d2f9

21 files changed

+948
-1082
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ v32.1.0 (next, roadmap)
4141
of these in other summary plugins.
4242
See https://github.com/nexB/scancode-toolkit/issues/1745
4343

44+
- Upgraded spdx-tools dependency to v0.8.
45+
See https://github.com/nexB/scancode-toolkit/issues/3455
46+
4447

4548
v32.0.6 - 2023-07-13
4649
------------------------

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ requests==2.28.1
6464
saneyaml==0.6.0
6565
six==1.16.0
6666
soupsieve==2.3.2.post1
67-
spdx-tools==0.7.0rc0
67+
spdx-tools==0.8.1
6868
text-unidecode==1.3
6969
toml==0.10.2
7070
typecode==30.0.1

setup-mini.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ install_requires =
105105
pymaven_patch >= 0.2.8
106106
requests >= 2.7.0
107107
saneyaml >= 0.6.0
108-
spdx_tools == 0.7.0rc0
108+
spdx_tools == 0.8.1
109109
text_unidecode >= 1.0
110110
toml >= 0.10.0
111111
urlpy

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ install_requires =
105105
pymaven_patch >= 0.2.8
106106
requests >= 2.7.0
107107
saneyaml >= 0.6.0
108-
spdx_tools == 0.7.0rc0
108+
spdx_tools == 0.8.1
109109
text_unidecode >= 1.0
110110
toml >= 0.10.0
111111
urlpy

src/formattedcode/output_spdx.py

Lines changed: 91 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,33 @@
99
import os
1010
import sys
1111
import uuid
12+
from datetime import datetime
1213
from io import BytesIO
1314
from io import StringIO
1415

15-
from spdx.checksum import Checksum
16-
from spdx.checksum import ChecksumAlgorithm
17-
from spdx.creationinfo import Tool
18-
from spdx.document import ExtractedLicense
19-
from spdx.document import Document
20-
from spdx.license import License
21-
from spdx.file import File
22-
from spdx.package import Package
23-
from spdx.relationship import Relationship
24-
from spdx.utils import calc_verif_code
25-
from spdx.utils import NoAssert
26-
from spdx.utils import SPDXNone
27-
from spdx.version import Version
2816

2917
from license_expression import Licensing
3018
from commoncode.cliutils import OUTPUT_GROUP
3119
from commoncode.cliutils import PluggableCommandLineOption
3220
from commoncode.fileutils import file_name
3321
from commoncode.fileutils import parent_directory
3422
from commoncode.text import python_safe_name
23+
from spdx_tools.spdx.model import SpdxNoAssertion
24+
from spdx_tools.spdx.model import Version
25+
from spdx_tools.spdx.model import CreationInfo
26+
from spdx_tools.spdx.model import Actor
27+
from spdx_tools.spdx.model import ActorType
28+
from spdx_tools.spdx.model import Document
29+
from spdx_tools.spdx.model import Package
30+
from spdx_tools.spdx.model import File
31+
from spdx_tools.spdx.model import Checksum
32+
from spdx_tools.spdx.model import ChecksumAlgorithm
33+
from spdx_tools.spdx.model import ExtractedLicensingInfo
34+
from spdx_tools.spdx.model import SpdxNone
35+
from spdx_tools.spdx.model import Relationship
36+
from spdx_tools.spdx.model import RelationshipType
37+
from spdx_tools.spdx.spdx_element_utils import calculate_package_verification_code
38+
3539
from formattedcode import FileOptionType
3640
from licensedcode.detection import get_matches_from_detection_mappings
3741
from plugincode.output import output_impl
@@ -63,50 +67,6 @@ def logger_debug(*args):
6367
Output plugins to write scan results in SPDX format.
6468
"""
6569

66-
_spdx_list_is_patched = False
67-
68-
69-
def _patch_license_list():
70-
"""
71-
Patch the SPDX Python library license list to match the list of ScanCode
72-
known SPDX licenses.
73-
"""
74-
global _spdx_list_is_patched
75-
if not _spdx_list_is_patched:
76-
from spdx.config import LICENSE_MAP
77-
from licensedcode.models import load_licenses
78-
licenses = load_licenses(with_deprecated=True)
79-
spdx_licenses = get_licenses_by_spdx_key(licenses.values())
80-
LICENSE_MAP.update(spdx_licenses)
81-
_spdx_list_is_patched = True
82-
83-
84-
def get_licenses_by_spdx_key(licenses):
85-
"""
86-
Return a mapping of {spdx_key: license object} given a ``license`` sequence
87-
of License objects.
88-
"""
89-
spdx_licenses = {}
90-
for lic in licenses:
91-
if not (lic.spdx_license_key or lic.other_spdx_license_keys):
92-
continue
93-
94-
if lic.spdx_license_key:
95-
name = lic.name
96-
slk = lic.spdx_license_key
97-
spdx_licenses[slk] = name
98-
spdx_licenses[name] = slk
99-
100-
for other_spdx in lic.other_spdx_license_keys:
101-
if not (other_spdx and other_spdx.strip()):
102-
continue
103-
slk = other_spdx
104-
spdx_licenses[slk] = name
105-
spdx_licenses[name] = slk
106-
107-
return spdx_licenses
108-
109-
11070
@output_impl
11171
class SpdxTvOutput(OutputPlugin):
11272

@@ -220,7 +180,7 @@ def write_spdx(
220180
tool_version,
221181
notice,
222182
package_name='',
223-
download_location=NoAssert(),
183+
download_location=SpdxNoAssertion(),
224184
as_tagvalue=True,
225185
spdx_version = (2, 2),
226186
with_notice_text=False,
@@ -240,37 +200,43 @@ def write_spdx(
240200
licensing = Licensing()
241201

242202
as_rdf = not as_tagvalue
243-
_patch_license_list()
244203

245204
ns_prefix = '_'.join(package_name.lower().split())
246205
comment = notice + f'\nSPDX License List: {scancode_config.spdx_license_list_version}'
247206

248207
version_major, version_minor = scancode_config.spdx_license_list_version.split(".")
249208
spdx_license_list_version = Version(major=version_major, minor=version_minor)
250209

251-
doc = Document(
252-
version=Version(*spdx_version),
253-
data_license=License.from_identifier('CC0-1.0'),
254-
comment=notice,
255-
namespace=f'http://spdx.org/spdxdocs/{ns_prefix}-{uuid.uuid4()}',
256-
license_list_version=scancode_config.spdx_license_list_version,
257-
name='SPDX Document created by ScanCode Toolkit'
210+
tool_name = tool_name or 'ScanCode'
211+
creator = Actor(ActorType.TOOL, f'{tool_name} {tool_version}')
212+
213+
creation_info = CreationInfo(
214+
spdx_id="SPDXRef-DOCUMENT",
215+
spdx_version=f"SPDX-{spdx_version[0]}.{spdx_version[1]}",
216+
data_license='CC0-1.0',
217+
document_comment=comment,
218+
document_namespace=f'http://spdx.org/spdxdocs/{ns_prefix}-{uuid.uuid4()}',
219+
license_list_version=spdx_license_list_version,
220+
name='SPDX Document created by ScanCode Toolkit',
221+
creators=[creator],
222+
created=datetime.now(),
258223
)
259224

260-
tool_name = tool_name or 'ScanCode'
261-
doc.creation_info.add_creator(Tool(f'{tool_name} {tool_version}'))
262-
doc.creation_info.set_created_now()
263-
doc.creation_info.license_list_version = spdx_license_list_version
264225

265226
package_id = '001'
266-
package = doc.package = Package(
227+
package = Package(
267228
name=package_name,
268229
download_location=download_location,
269230
spdx_id=f'SPDXRef-{package_id}',
270231
)
271232

233+
doc = Document(
234+
creation_info=creation_info,
235+
packages=[package],
236+
)
237+
272238
# Use a set of unique copyrights for the package.
273-
package.cr_text = set()
239+
package_copyright_texts = set()
274240

275241
all_files_have_no_license = True
276242
all_files_have_no_copyright = True
@@ -285,13 +251,18 @@ def write_spdx(
285251
# Set a relative file name as that is what we want in
286252
# SPDX output (with explicit leading './').
287253
name = './' + file_data.get('path')
288-
file_entry = File(
289-
spdx_id=f'SPDXRef-{sid}',
290-
name=name)
254+
291255
if file_data.get('file_type') == 'empty':
292-
file_entry.set_checksum(Checksum(ChecksumAlgorithm.SHA1, "da39a3ee5e6b4b0d3255bfef95601890afd80709"))
256+
checksum = Checksum(ChecksumAlgorithm.SHA1, "da39a3ee5e6b4b0d3255bfef95601890afd80709")
293257
else:
294-
file_entry.set_checksum(Checksum(ChecksumAlgorithm.SHA1, file_data.get('sha1') or ''))
258+
# FIXME: this sets the checksum of a file to the empty string hash if unknown; tracked in https://github.com/nexB/scancode-toolkit/issues/3453
259+
checksum = Checksum(ChecksumAlgorithm.SHA1, file_data.get('sha1') or 'da39a3ee5e6b4b0d3255bfef95601890afd80709')
260+
261+
file_entry = File(
262+
spdx_id=f'SPDXRef-{sid}',
263+
name=name,
264+
checksums=[checksum]
265+
)
295266

296267
file_license_detections = file_data.get('license_detections')
297268
license_matches = get_matches_from_detection_mappings(file_license_detections)
@@ -312,63 +283,63 @@ def write_spdx(
312283
spdx_id = f'LicenseRef-scancode-{license_key}'
313284
is_license_ref = spdx_id.lower().startswith('licenseref-')
314285

315-
if not is_license_ref:
316-
spdx_license = License.from_identifier(spdx_id)
317-
else:
318-
spdx_license = ExtractedLicense(spdx_id)
319-
spdx_license.name = file_license.short_name
286+
spdx_license = licensing.parse(spdx_id)
287+
288+
if is_license_ref:
289+
text = match.get('matched_text')
320290
# FIXME: replace this with the licensedb URL
321291
comment = (
322292
f'See details at https://github.com/nexB/scancode-toolkit'
323-
f'/blob/develop/src/licensedcode/data/licenses/{license_key}.yml\n'
293+
f'/blob/develop/src/licensedcode/data/licenses/{license_key}.LICENSE\n'
324294
)
325-
spdx_license.comment = comment
326-
text = match.get('matched_text')
327-
# always set some text, even if we did not extract the
328-
# matched text
329-
if not text:
330-
text = comment
331-
spdx_license.text = text
332-
doc.add_extr_lic(spdx_license)
295+
extracted_license = ExtractedLicensingInfo(
296+
license_id=spdx_id,
297+
# always set some text, even if we did not extract the
298+
# matched text
299+
extracted_text=text if text else comment,
300+
license_name=file_license.short_name,
301+
comment=comment,
302+
)
303+
doc.extracted_licensing_info.append(extracted_license)
333304

334305
# Add licenses in the order they appear in the file. Maintaining
335306
# the order might be useful for provenance purposes.
336-
file_entry.add_lics(spdx_license)
337-
package.add_lics_from_file(spdx_license)
307+
file_entry.license_info_in_file.append(spdx_license)
308+
package.license_info_from_files.append(spdx_license)
338309

339310
elif license_matches is None:
340311
all_files_have_no_license = False
341-
file_entry.add_lics(NoAssert())
312+
file_entry.license_info_in_file.append(SpdxNoAssertion())
342313

343314
else:
344-
file_entry.add_lics(SPDXNone())
315+
file_entry.license_info_in_file.append(SpdxNone())
345316

346-
file_entry.conc_lics = NoAssert()
317+
file_entry.license_concluded = SpdxNoAssertion()
347318

348319
file_copyrights = file_data.get('copyrights')
349320
if file_copyrights:
350321
all_files_have_no_copyright = False
351-
file_entry.copyright = []
322+
copyrights = []
352323
for file_copyright in file_copyrights:
353-
file_entry.copyright.append(file_copyright.get('copyright'))
324+
copyrights.append(file_copyright.get('copyright'))
354325

355-
package.cr_text.update(file_entry.copyright)
326+
package_copyright_texts.update(copyrights)
356327

357328
# Create a text of copyright statements in the order they appear in
358329
# the file. Maintaining the order might be useful for provenance
359330
# purposes.
360-
file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'
331+
file_entry.copyright_text = '\n'.join(copyrights) + '\n'
361332

362333
elif file_copyrights is None:
363334
all_files_have_no_copyright = False
364-
file_entry.copyright = NoAssert()
335+
file_entry.copyright_text = SpdxNoAssertion()
365336

366337
else:
367-
file_entry.copyright = SPDXNone()
338+
file_entry.copyright_text = SpdxNone()
368339

369-
doc.add_file(file_entry)
370-
relationship = Relationship(f'{package.spdx_id} CONTAINS {file_entry.spdx_id}')
371-
doc.add_relationship(relationship)
340+
doc.files.append(file_entry)
341+
relationship = Relationship(package.spdx_id, RelationshipType.CONTAINS, file_entry.spdx_id)
342+
doc.relationships.append(relationship)
372343

373344
if not doc.files:
374345
if as_tagvalue:
@@ -379,33 +350,29 @@ def write_spdx(
379350
output_file.write(msg)
380351

381352
# Remove duplicate licenses from the list for the package.
382-
unique_licenses = {l.identifier: l for l in package.licenses_from_files}
383-
unique_licenses = list(unique_licenses.values())
384-
if not len(package.licenses_from_files):
353+
package.license_info_from_files = list(set(package.license_info_from_files))
354+
if not package.license_info_from_files:
385355
if all_files_have_no_license:
386-
package.licenses_from_files = [SPDXNone()]
356+
package.license_info_from_files = [SpdxNone()]
387357
else:
388-
package.licenses_from_files = [NoAssert()]
358+
package.license_info_from_files = [SpdxNoAssertion()]
389359
else:
390360
# List license identifiers alphabetically for the package.
391-
package.licenses_from_files = sorted(
392-
unique_licenses,
393-
key=lambda x: x.identifier,
394-
)
361+
package.license_info_from_files = sorted(package.license_info_from_files)
395362

396-
if len(package.cr_text) == 0:
363+
if not package_copyright_texts:
397364
if all_files_have_no_copyright:
398-
package.cr_text = SPDXNone()
365+
package.copyright_text = SpdxNone()
399366
else:
400-
package.cr_text = NoAssert()
367+
package.copyright_text = SpdxNoAssertion()
401368
else:
402369
# Create a text of alphabetically sorted copyright
403370
# statements for the package.
404-
package.cr_text = '\n'.join(sorted(package.cr_text)) + '\n'
371+
package.copyright_text = '\n'.join(sorted(package_copyright_texts)) + '\n'
405372

406-
package.verif_code = calc_verif_code(doc.files)
407-
package.license_declared = NoAssert()
408-
package.conc_lics = NoAssert()
373+
package.verification_code = calculate_package_verification_code(doc.files)
374+
package.license_declared = SpdxNoAssertion()
375+
package.license_concluded = SpdxNoAssertion()
409376

410377
# The spdx-tools write_document returns either:
411378
# - unicode for tag values
@@ -416,19 +383,15 @@ def write_spdx(
416383
# in the other case we deal with text all the way.
417384

418385
if doc.files:
419-
420-
if as_tagvalue:
421-
from spdx.writers.tagvalue import write_document # NOQA
422-
elif as_rdf:
423-
from spdx.writers.rdf import write_document # NOQA
424-
425386
if as_tagvalue:
387+
from spdx_tools.spdx.writer.tagvalue.tagvalue_writer import write_document_to_stream # NOQA
426388
spdx_output = StringIO()
427389
elif as_rdf:
390+
from spdx_tools.spdx.writer.rdf.rdf_writer import write_document_to_stream # NOQA
428391
# rdf is utf-encoded bytes
429392
spdx_output = BytesIO()
430393

431-
write_document(doc, spdx_output, validate=False)
394+
write_document_to_stream(doc, spdx_output, validate=False)
432395
result = spdx_output.getvalue()
433396

434397
if as_rdf:

0 commit comments

Comments
 (0)