Skip to content

Fix/issue765 SPDX is_compound_expression does not strictly check for compound expression #773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CONTRIBUTING.md
Copy link
Member

@jkowalleck jkowalleck Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Superseded by #783

Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ But please read the
[CycloneDX contributing guidelines](https://github.com/CycloneDX/.github/blob/master/CONTRIBUTING.md)
first.

Before you start coding, please also read
[how to fork a repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo)
and [how create a pull request from a fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork)
to learn how to incorporate your work into the project.

Do not create a local branch from the main repository, as a push to GitHub is not granted this way.
## Setup

This project uses [poetry]. Have it installed and setup first.
Expand Down
19 changes: 9 additions & 10 deletions cyclonedx/factory/license.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from ..exception.factory import InvalidLicenseExpressionException, InvalidSpdxLicenseException
from ..model.license import DisjunctiveLicense, LicenseExpression
from ..spdx import fixup_id as spdx_fixup, is_compound_expression as is_spdx_compound_expression
from ..spdx import fixup_id as spdx_fixup, is_compound_expression as is_spdx_compound_expression, is_spdx_license_id

if TYPE_CHECKING: # pragma: no cover
from ..model import AttachedText, XsUri
Expand All @@ -35,24 +35,21 @@ def make_from_string(self, value: str, *,
license_acknowledgement: Optional['LicenseAcknowledgement'] = None
) -> 'License':
"""Make a :class:`cyclonedx.model.license.License` from a string."""
try:
if is_spdx_license_id(value):
return self.make_with_id(value,
text=license_text,
url=license_url,
acknowledgement=license_acknowledgement)
except InvalidSpdxLicenseException:
pass
try:
elif is_spdx_compound_expression(value):
return self.make_with_expression(value,
acknowledgement=license_acknowledgement)
except InvalidLicenseExpressionException:
pass
return self.make_with_name(value,
text=license_text,
url=license_url,
acknowledgement=license_acknowledgement)

def make_with_expression(self, expression: str, *,
@staticmethod
def make_with_expression(expression: str, *,
acknowledgement: Optional['LicenseAcknowledgement'] = None
) -> LicenseExpression:
"""Make a :class:`cyclonedx.model.license.LicenseExpression` with a compound expression.
Expand All @@ -65,7 +62,8 @@ def make_with_expression(self, expression: str, *,
return LicenseExpression(expression, acknowledgement=acknowledgement)
raise InvalidLicenseExpressionException(expression)

def make_with_id(self, spdx_id: str, *,
@staticmethod
def make_with_id(spdx_id: str, *,
text: Optional['AttachedText'] = None,
url: Optional['XsUri'] = None,
acknowledgement: Optional['LicenseAcknowledgement'] = None
Expand All @@ -79,7 +77,8 @@ def make_with_id(self, spdx_id: str, *,
raise InvalidSpdxLicenseException(spdx_id)
return DisjunctiveLicense(id=spdx_license_id, text=text, url=url, acknowledgement=acknowledgement)

def make_with_name(self, name: str, *,
@staticmethod
def make_with_name(name: str, *,
text: Optional['AttachedText'] = None,
url: Optional['XsUri'] = None,
acknowledgement: Optional['LicenseAcknowledgement'] = None
Expand Down
127 changes: 108 additions & 19 deletions cyclonedx/spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,60 +18,149 @@

__all__ = [
'is_supported_id', 'fixup_id',
'is_compound_expression'
'is_simple_expression', 'is_compound_expression', 'is_spdx_license_id', 'is_spdx_expression'
]

from json import load as json_load
from typing import TYPE_CHECKING, Dict, Optional, Set

from license_expression import get_spdx_licensing # type:ignore[import-untyped]

from .schema._res import SPDX_JSON as __SPDX_JSON_SCHEMA
from boolean.boolean import Expression # type:ignore[import-untyped]
from license_expression import ( # type:ignore[import-untyped]
AND,
OR,
ExpressionError,
LicenseSymbol,
LicenseWithExceptionSymbol,
get_license_index,
get_spdx_licensing,
)

if TYPE_CHECKING: # pragma: no cover
from license_expression import Licensing

# region init
# python's internal module loader will assure that this init-part runs only once.

# !!! this requires to ship the actual schema data with the package.
with open(__SPDX_JSON_SCHEMA) as schema:
__IDS: Set[str] = set(json_load(schema).get('enum', []))
assert len(__IDS) > 0, 'known SPDX-IDs should be non-empty set'

__IDS_LOWER_MAP: Dict[str, str] = dict((id_.lower(), id_) for id_ in __IDS)

__SPDX_EXPRESSION_LICENSING: 'Licensing' = get_spdx_licensing()
__KNOWN_IDS = ([entry['spdx_license_key'] for entry in get_license_index()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do i see this right? Instead of using CycloneDX own well-known list of allowed values for a SPDX license ID,
the new code uses an arbitrary list maintained by an unauthoritative 3rd party?
If this is what this is about, then a clear "no". the CycloneDX explicitly states a defined list of well-known values here - which is maintained in https://github.com/CycloneDX/specification/blob/master/schema/spdx.{xsd,schema.json)}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are using the license-expression module for license validation in the is_compound _expression. And this module internally takes already the data I use for the SPDX IDs to do this verification for official and specified LicenseRef-scancode-*. So I did not introduce something new.

To use different data from different sources is inconsistent for me an leds to problems if one of the modules, cyclonedx-python-lib and license-expression are not most up to date and correct at the same time. Depending on usage on a single or compound the use of same licenses may result in different IDs. This is what we currently already have.

It could be fine to have a text (for the target application) to check license-expressions SPDX license list IDs data.
During my development (having the dedicated SPDX list as reference) a test was failing when my implementation used license-expression data. Errors may happen, but the latest license-expression maintainers fixed the issue already and converted a formerly existing LicenseRef-scancode- ID into the now official one.

Furthermore, the BSI explicitly recommends the usage of the AboutCode/ScanCode database in their SBOM TR (for the LicenseRef-scancode-* custom IDs that also is incorporated in the license-expression module. And the module also receives updates for the SPDX license list.

If your point to use both at the same time, this is your decision as a core maintainer.

But for me I say that I do not want to rely on two different lists burned manually into each module version.

Copy link
Member

@jkowalleck jkowalleck Feb 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are using the license-expression module for license validation in the is_compound _expression. And this module internally takes already the data I use for the SPDX IDs to do this verification for official and specified LicenseRef-scancode-*. So I did not introduce something new.

This is a complete behavioral change.
The own shipped list of well-known SPDX ID's is a different one than the one any 3rd party library uses.

To use different data from different sources is inconsistent for me an leds to problems [...]

there were no different data. there was just one data: the official list of well-known SPDX license IDs from the official CycloneDX spec. please see below.

If your point to use both at the same time, this is your decision as a core maintainer.

it is not my decision. it is adherence to the spec.
this is the official spec:

Furthermore, the BSI explicitly recommends the usage of the AboutCode/ScanCode database in their SBOM TR (for the LicenseRef-scancode-* custom IDs that also is incorporated in the license-expression module. And the module also receives updates for the SPDX license list.

This is the BSI's very own narrow profile/view/flavor of SBOM. and it is a recommendation that nobody is forced to follow.
This library is about the international standard CycloneDX - which is much more than SBOM - and much more than a recommendation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me say just two things.
Even if BSI's document is just a recommendation (what I have explicitly mentioned): It should work.
The 9.0.0-dev is_expression is not validating SPDX expressions in general, because verification is done against the scancode license DB that you refused to take as reference.

If you would extend the tests as I did you would see that it is not working as documented in the SPDX spec.
For example, custom LicenceRef-* license references are not considered as correct except those that are listed in Scancode's DB. License item validation happens only against the internal ScanCode DB.

Please look into the function description of Licensing.parse and the code of license-expression in general to see how it works.

Copy link
Member

@jkowalleck jkowalleck Feb 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you would extend the tests as I did you would see that it is not working as documented in the SPDX spec.
For example, custom LicenceRef-* license references are not considered as correct except those that are listed in Scancode's DB. License item validation happens only against the internal ScanCode DB.

thank you for pointing this out. 👍
will add the tests and see how they may be passed. (see #781)

Copy link
Member

@jkowalleck jkowalleck Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if entry['spdx_license_key'] and not entry['is_exception']]
+ [item for license_entry in get_license_index()
for item in license_entry['other_spdx_license_keys'] if not license_entry['is_exception']])
__IDS: Set[str] = set(__KNOWN_IDS)
__IDS_LOWER_MAP: Dict[str, str] = {**{entry['spdx_license_key'].lower(): entry['spdx_license_key']
for entry in get_license_index()
if entry['spdx_license_key'] and not entry['is_exception']},
**{item.lower(): item for license_entry in get_license_index()
for item in license_entry['other_spdx_license_keys']
if not license_entry['is_exception']}}

# endregion


def is_supported_id(value: str) -> bool:
"""Validate a SPDX-ID according to current spec."""
"""Validate an SPDX-ID according to current spec."""
return value in __IDS


def fixup_id(value: str) -> Optional[str]:
"""Fixup a SPDX-ID.
"""Fixup an SPDX-ID.

:returns: repaired value string, or `None` if fixup was unable to help.
"""
return __IDS_LOWER_MAP.get(value.lower())


def is_simple_expression(value: str, validate: bool = False) -> bool:
"""Indicates an SPDX simple expression (SPDX license identifier or license ref).

.. note::
Utilizes `license-expression library`_ to
validate SPDX simple expression according to `SPDX license expression spec`_.
DocumentRef- references are not in scope for CycloneDX.

.. _SPDX license expression spec: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/
.. _license-expression library: https://github.com/nexB/license-expression
"""
if not value:
return False
try:
expression = __SPDX_EXPRESSION_LICENSING.parse(value, strict=True, validate=validate)
except (NameError, ExpressionError):
return False
if type(expression) in [OR, AND]:
return False
if str(expression).startswith('LicenseRef-'):
# It is a custom license ref
return True
# It should be an official SPDX license identifier
result = __SPDX_EXPRESSION_LICENSING.validate(value, strict=True)
if result.errors:
# The value was not understood
return False
if result.original_expression == result.normalized_expression:
# The given value is identical to normalized, so it is a valid identifier
return True
if result.original_expression.upper() != result.normalized_expression.upper():
# It is not a capitalization issue, ID was normalized to another valid ID, so it is OK.
return True
return False


def is_compound_expression(value: str) -> bool:
"""Validate compound expression.
"""Indicates whether value is an SPDX compound expression.

.. note::
Utilizes `license-expression library`_ to
validate SPDX compound expression according to `SPDX license expression spec`_.
DocumentRef- references are not in scope for CycloneDX.

.. _SPDX license expression spec: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/
.. _license-expression library: https://github.com/nexB/license-expression
"""
def is_valid_item(expression: Expression) -> bool:
if type(expression) in [OR, AND]:
for item in expression.args:
if not is_valid_item(item):
return False
return True
elif type(expression) in [LicenseSymbol, LicenseWithExceptionSymbol]:
return is_simple_expression(str(expression))
return False

if not value:
return False
try:
res = __SPDX_EXPRESSION_LICENSING.validate(value)
except Exception:
# the throw happens when internals crash due to unexpected input characters.
parsed_expression = __SPDX_EXPRESSION_LICENSING.parse(value)
if type(parsed_expression) in [OR, AND] or isinstance(parsed_expression, LicenseWithExceptionSymbol):
return is_valid_item(parsed_expression)
else:
return False
except (NameError, ExpressionError):
return False
return 0 == len(res.errors)


def is_spdx_license_id(value: str) -> bool:
"""Indicates whether value is an SPDX license identifier from official list.

.. note::
Utilizes `license-expression library`_ to
validate SPDX compound expression according to `SPDX license expression spec`_.
DocumentRef- references are not in scope for CycloneDX.

.. _SPDX license expression spec: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/
.. _license-expression library: https://github.com/nexB/license-expression
"""
return is_simple_expression(value, validate=True) and not value.startswith('LicenseRef-')


def is_spdx_expression(value: str) -> bool:
"""Indicates whether value is an SPDX simple or compound expression.

.. note::
Utilizes `license-expression library`_ to
validate SPDX compound expression according to `SPDX license expression spec`_.
DocumentRef- references are not in scope for CycloneDX.

.. _SPDX license expression spec: https://spdx.github.io/spdx-spec/v2.3/SPDX-license-expressions/
.. _license-expression library: https://github.com/nexB/license-expression
"""
return is_simple_expression(value) or is_compound_expression(value)
15 changes: 6 additions & 9 deletions tests/test_factory_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def test_make_from_string_with_id(self) -> None:
acknowledgement = unittest.mock.NonCallableMock(spec=LicenseAcknowledgement)
expected = DisjunctiveLicense(id='bar', text=text, url=url, acknowledgement=acknowledgement)

with unittest.mock.patch('cyclonedx.factory.license.spdx_fixup', return_value='bar'), \
unittest.mock.patch('cyclonedx.factory.license.is_spdx_compound_expression', return_value=True):
with unittest.mock.patch('cyclonedx.factory.license.is_spdx_license_id', return_value=True), \
unittest.mock.patch('cyclonedx.factory.license.spdx_fixup', return_value='bar'):
actual = LicenseFactory().make_from_string('foo',
license_text=text,
license_url=url,
Expand All @@ -46,13 +46,10 @@ def test_make_from_string_with_name(self) -> None:
url = unittest.mock.NonCallableMock(spec=XsUri)
acknowledgement = unittest.mock.NonCallableMock(spec=LicenseAcknowledgement)
expected = DisjunctiveLicense(name='foo', text=text, url=url, acknowledgement=acknowledgement)

with unittest.mock.patch('cyclonedx.factory.license.spdx_fixup', return_value=None), \
unittest.mock.patch('cyclonedx.factory.license.is_spdx_compound_expression', return_value=False):
actual = LicenseFactory().make_from_string('foo',
license_text=text,
license_url=url,
license_acknowledgement=acknowledgement)
actual = LicenseFactory().make_from_string('foo',
license_text=text,
license_url=url,
license_acknowledgement=acknowledgement)

self.assertEqual(expected, actual)

Expand Down
Loading