aboutcode-org · RISHII-BHARADHWAJ · Jun 2, 2025 · Jun 2, 2025
diff --git a/src/attributecode/api.py b/src/attributecode/api.py
@@ -23,7 +23,7 @@
 
 from attributecode import ERROR
 from attributecode import Error
-
+from attributecode import __version__
 """
 API call helpers
 """

diff --git a/src/attributecode/attrib.py b/src/attributecode/attrib.py
@@ -2,7 +2,7 @@
 # -*- coding: utf8 -*-
 
 # ============================================================================
-#  Copyright (c) nexB Inc. http://www.nexb.com/ - All rights reserved.
+#  Copyright (c) nexB Inc. http://www.nexB.com/ - All rights reserved.
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
@@ -62,7 +62,9 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
 
     template = jinja2.Template(template)
     # Get the current UTC time
-    utcnow = datetime.datetime.utcnow()
+    # utcnow = datetime.datetime.utcnow()
+    # Use timezone-aware UTC datetime to avoid deprecation warning
+    utcnow = datetime.datetime.now(datetime.timezone.utc)
 
     licenses_list = []
     lic_name_expression_list = []
@@ -154,7 +156,7 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
 
     rendered = template.render(
         abouts=abouts,
-        common_licenses=COMMON_LICENSES,
+        common_licenses=[lic.key for lic in licenses_list],
         licenses_list=licenses_list,
         utcnow=utcnow,
         tkversion=__version__,

diff --git a/src/attributecode/cmd.py b/src/attributecode/cmd.py
@@ -2,7 +2,7 @@
 # -*- coding: utf8 -*-
 
 # ============================================================================
-#  Copyright (c) nexB Inc. http://www.nexb.com/ - All rights reserved.
+#  Copyright (c) nexB Inc. http://www.nexB.com/ - All rights reserved.
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
@@ -40,6 +40,7 @@
 from attributecode import __about_spec_version__
 from attributecode.util import unique
 from attributecode import WARNING
+from attributecode import Error
 
 from collections import defaultdict
 from functools import partial

diff --git a/src/attributecode/gen.py b/src/attributecode/gen.py
@@ -13,7 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 # ============================================================================
-
+import saneyaml
 from posixpath import basename
 from posixpath import dirname
 from posixpath import exists
@@ -34,7 +34,7 @@
 from attributecode.util import UNC_PREFIX_POSIX
 from attributecode.util import load_scancode_json, load_csv, load_json, load_excel
 from attributecode.util import strip_inventory_value
-
+from attributecode import __version__
 
 def check_duplicated_columns(location):
     """

diff --git a/src/attributecode/model.py b/src/attributecode/model.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf8 -*-
 # ============================================================================
-#  Copyright (c) nexB Inc. http://www.nexb.com/ - All rights reserved.
+#  Copyright (c) nexB Inc. http://www.nexB.com/ - All rights reserved.
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
@@ -37,34 +37,32 @@
 
 from license_expression import Licensing
 from packageurl import PackageURL
-
 from attributecode import __version__
-from attributecode import CRITICAL
-from attributecode import ERROR
-from attributecode import INFO
-from attributecode import WARNING
-from attributecode import api
-from attributecode import Error
-from attributecode import saneyaml
-from attributecode import gen
 from attributecode import util
-from attributecode.transform import write_excel
-from attributecode.util import add_unc
-from attributecode.util import boolean_fields
-from attributecode.util import copy_license_notice_files
-from attributecode.util import copy_file
-from attributecode.util import csv
-from attributecode.util import file_fields
-from attributecode.util import filter_errors
-from attributecode.util import get_spdx_key_and_lic_key_from_licdb
-from attributecode.util import is_valid_name
-from attributecode.util import on_windows
-from attributecode.util import norm
-from attributecode.util import replace_tab_with_spaces
-from attributecode.util import wrap_boolean_value
-from attributecode.util import UNC_PREFIX
-from attributecode.util import ungroup_licenses
-from attributecode.util import ungroup_licenses_from_sctk
+from attributecode import Error, CRITICAL, ERROR, WARNING, INFO
+from attributecode.util import (
+    add_unc,
+    boolean_fields,
+    copy_license_notice_files,
+    copy_file,
+    csv,
+    file_fields,
+    filter_errors,
+    get_spdx_key_and_lic_key_from_licdb,
+    is_valid_name,
+    on_windows,
+    norm,
+    replace_tab_with_spaces,
+    wrap_boolean_value,
+    UNC_PREFIX,
+    ungroup_licenses,
+    ungroup_licenses_from_sctk,
+    parse_license_expression,
+    detect_special_char,
+    valid_api_url
+)
+
+import saneyaml
 
 genereated_tk_version = "# Generated with AboutCode Toolkit Version %s \n\n" % __version__
 
@@ -234,9 +232,10 @@ class StringField(Field):
     """
 
     def _validate(self, *args, **kwargs):
-        errors = super(StringField, self)._validate(*args, ** kwargs)
+        errors = super(StringField, self)._validate(*args, **kwargs)
+
         no_special_char_field = [
-            'license_expression', 'license_key', 'license_name', 'declared_license_expression', 'other_license_expression ']
+            'license_expression', 'license_key', 'license_name', 'declared_license_expression', 'other_license_expression']
         name = self.name
         if name in no_special_char_field:
             val = self.value
@@ -2145,40 +2144,3 @@ def convert_spdx_expression_to_lic_expression(spdx_key, spdx_lic_dict):
     return value
 
 
-def parse_license_expression(lic_expression):
-    licensing = Licensing()
-    lic_list = []
-    invalid_lic_exp = ''
-    special_char = detect_special_char(lic_expression)
-    if not special_char:
-        # Parse the license expression and save it into a list
-        try:
-            lic_list = licensing.license_keys(lic_expression)
-        except:
-            invalid_lic_exp = lic_expression
-    return special_char, lic_list, invalid_lic_exp
-
-
-def detect_special_char(expression):
-    not_support_char = [
-        '!', '@', '#', '$', '^', '&', '*', '=', '{', '}',
-        '|', '[', ']', '\\', ':', ';', '<', '>', '?', ',', '/']
-    special_character = []
-    for char in not_support_char:
-        if char in expression:
-            special_character.append(char)
-    return special_character
-
-
-def valid_api_url(api_url):
-    try:
-        response = get(api_url)
-        # The 403 error code is expected if the api_url is pointing to DJE as no
-        # API key is provided. The 200 status code represent connection success
-        # to scancode's LicenseDB. All other exception yield to invalid api_url
-        if response.status_code == 403 or response.status_code == 200:
-            return True
-        else:
-            return False
-    except:
-        return False
diff --git a/src/attributecode/transform.py b/src/attributecode/transform.py
@@ -23,8 +23,12 @@
 from attributecode import CRITICAL
 from attributecode import Error
 from attributecode import saneyaml
-from attributecode.util import csv
-from attributecode.util import replace_tab_with_spaces
+from attributecode.util import (
+    csv,
+    replace_tab_with_spaces,
+    parse_license_expression,  # canonical implementation from util.py
+    detect_special_char,       # canonical implementation from util.py
+)
 
 
 def transform_csv(location):

diff --git a/src/attributecode/util.py b/src/attributecode/util.py
@@ -32,6 +32,8 @@
 from attributecode import CRITICAL
 from attributecode import WARNING
 from attributecode import Error
+from attributecode import __version__
+from attributecode import Error
 
 on_windows = "win32" in sys.platform
 
@@ -273,6 +275,50 @@ def get_spdx_key_and_lic_key_from_licdb():
 
     return lic_dict
 
+"""
+Canonical implementation of license expression parsing and special character detection.
+Import and use these from util.py everywhere in the codebase to avoid duplication.
+"""
+def parse_license_expression(lic_expression):
+    from license_expression import Licensing
+    licensing = Licensing()
+    lic_list = []
+    invalid_lic_exp = ''
+    special_char = detect_special_char(lic_expression)
+    if not special_char:
+        # Parse the license expression and save it into a list
+        try:
+            lic_list = licensing.license_keys(lic_expression)
+        except Exception:
+            invalid_lic_exp = lic_expression
+    return special_char, lic_list, invalid_lic_exp
+
+def detect_special_char(expression):
+    not_support_char = [
+        '!', '@', '#', '$', '^', '&', '*', '=', '{', '}',
+        '|', '[', ']', '\\', ':', ';', '<', '>', '?', ',', '/']
+    special_character = []
+    if not isinstance(expression, str):
+        return special_character
+    for char in not_support_char:
+        if char in expression:
+            special_character.append(char)
+    return special_character
+
+
+def valid_api_url(api_url):
+    try:
+        response = get(api_url)
+        # The 403 error code is expected if the api_url is pointing to DJE as no
+        # API key is provided. The 200 status code represent connection success
+        # to scancode's LicenseDB. All other exception yield to invalid api_url
+        if response.status_code == 403 or response.status_code == 200:
+            return True
+        else:
+            return False
+    except:
+        return False
+
 
 def get_relative_path(base_loc, full_loc):
     """

diff --git a/tests/test_attrib.py b/tests/test_attrib.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python
+
 # -*- coding: utf8 -*-
 
 # ============================================================================
-#  Copyright (c) nexB Inc. http://www.nexb.com/ - All rights reserved.
+#  Copyright (c) nexB Inc. http://www.nexB.com/ - All rights reserved.
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
@@ -175,8 +176,14 @@ def test_scancode_input_min_score_0(self):
         # expected doesn't work well, it works after removed all the newline and spaces
         # assert expected == result
         # assert expected.splitlines(False) == result.splitlines(False)
-        assert expected.replace('\n', '').replace(' ', '').replace(
-            '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
+        actual = result.replace('\n', '').replace(' ', '').replace('\t', '')
+        exp = expected.replace('\n', '').replace(' ', '').replace('\t', '')
+        if actual != exp:
+            print('---EXPECTED---')
+            print(expected)
+            print('---ACTUAL---')
+            print(result)
+        assert exp == actual
 
     def test_scancode_input_min_score_100(self):
         test_file = get_test_loc(
@@ -207,8 +214,14 @@ def test_scancode_input_min_score_100(self):
         # expected doesn't work well, it works after removed all the newline and spaces
         # assert expected == result
         # assert expected.splitlines(False) == result.splitlines(False)
-        assert expected.replace('\n', '').replace(' ', '').replace(
-            '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
+        actual = result.replace('\n', '').replace(' ', '').replace('\t', '')
+        exp = expected.replace('\n', '').replace(' ', '').replace('\t', '')
+        if actual != exp:
+            print('---EXPECTED---')
+            print(expected)
+            print('---ACTUAL---')
+            print(result)
+        assert exp == actual
 
     def test_scancode_input_dup_lic(self):
         test_file = get_test_loc('test_attrib/scancode_input/sc-dup-lic.json')
@@ -238,8 +251,14 @@ def test_scancode_input_dup_lic(self):
         # expected doesn't work well, it works after removed all the newline and spaces
         # assert expected == result
         # assert expected.splitlines(False) == result.splitlines(False)
-        assert expected.replace('\n', '').replace(' ', '').replace(
-            '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
+        actual = result.replace('\n', '').replace(' ', '').replace('\t', '')
+        exp = expected.replace('\n', '').replace(' ', '').replace('\t', '')
+        if actual != exp:
+            print('---EXPECTED---')
+            print(expected)
+            print('---ACTUAL---')
+            print(result)
+        assert exp == actual
 
     def test_scancode_input_dup_lic_match(self):
         test_file = get_test_loc(
@@ -272,8 +291,14 @@ def test_scancode_input_dup_lic_match(self):
         # expected doesn't work well, it works after removed all the newline and spaces
         # assert expected == result
         # assert expected.splitlines(False) == result.splitlines(False)
-        assert expected.replace('\n', '').replace(' ', '').replace(
-            '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
+        actual = result.replace('\n', '').replace(' ', '').replace('\t', '')
+        exp = expected.replace('\n', '').replace(' ', '').replace('\t', '')
+        if actual != exp:
+            print('---EXPECTED---')
+            print(expected)
+            print('---ACTUAL---')
+            print(result)
+        assert exp == actual
 
     def test_scancode_input_multi_lic(self):
         test_file = get_test_loc(
@@ -304,8 +329,14 @@ def test_scancode_input_multi_lic(self):
         # expected doesn't work well, it works after removed all the newline and spaces
         # assert expected == result
         # assert expected.splitlines(False) == result.splitlines(False)
-        assert expected.replace('\n', '').replace(' ', '').replace(
-            '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
+        actual = result.replace('\n', '').replace(' ', '').replace('\t', '')
+        exp = expected.replace('\n', '').replace(' ', '').replace('\t', '')
+        if actual != exp:
+            print('---EXPECTED---')
+            print(expected)
+            print('---ACTUAL---')
+            print(result)
+        assert exp == actual
 
     def test_generate_with_csv(self):
         test_file = get_test_loc(