diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py index 9d7fd94ca..e66be4ac2 100644 --- a/src/macaron/build_spec_generator/build_spec_generator.py +++ b/src/macaron/build_spec_generator/build_spec_generator.py @@ -98,8 +98,8 @@ def gen_build_spec_for_purl( case BuildSpecFormat.DOCKERFILE: try: build_spec_content = gen_dockerfile(build_spec) - except ValueError as error: - logger.error("Error while serializing the build spec: %s.", error) + except GenerateBuildSpecError as error: + logger.error("Error while generating the build spec: %s.", error) return os.EX_DATAERR build_spec_file_path = os.path.join(build_spec_dir_path, "dockerfile.buildspec") diff --git a/src/macaron/build_spec_generator/common_spec/base_spec.py b/src/macaron/build_spec_generator/common_spec/base_spec.py index c567609f7..698a0b948 100644 --- a/src/macaron/build_spec_generator/common_spec/base_spec.py +++ b/src/macaron/build_spec_generator/common_spec/base_spec.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module includes base build specification and helper classes.""" @@ -81,6 +81,9 @@ class BaseBuildSpecDict(TypedDict, total=False): #: be a list of these that were used in building the wheel alongside their version. build_backends: NotRequired[list[str]] + #: Flag to indicate if the artifact includes binaries. + has_binaries: NotRequired[bool] + class BaseBuildSpec(ABC): """Abstract base class for build specification behavior and field resolution.""" diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py index 26b2f329f..4c2cf1ecd 100644 --- a/src/macaron/build_spec_generator/common_spec/core.py +++ b/src/macaron/build_spec_generator/common_spec/core.py @@ -378,7 +378,7 @@ def gen_generic_build_spec( "purl": str(purl), "language": target_language, "build_tools": build_tool_names, - "build_commands": [selected_build_command], + "build_commands": [selected_build_command] if selected_build_command else [], } ) ECOSYSTEMS[purl.type.upper()].value(base_build_spec_dict).resolve_fields(purl) diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py index 999afbb19..7bdc023b6 100644 --- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py +++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module includes build specification and helper classes for PyPI packages.""" @@ -17,7 +17,7 @@ from macaron.build_spec_generator.build_command_patcher import CLI_COMMAND_PATCHES, patch_commands from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict from macaron.config.defaults import defaults -from macaron.errors import GenerateBuildSpecError, SourceCodeError +from macaron.errors import GenerateBuildSpecError, SourceCodeError, WheelTagError from macaron.json_tools import json_extract from macaron.slsa_analyzer.package_registry import pypi_registry from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo @@ -114,9 +114,9 @@ def resolve_fields(self, purl: PackageURL) -> None: pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info) patched_build_commands: list[list[str]] = [] - build_requires_set: set[str] = set() build_backends_set: set[str] = set() parsed_build_requires: dict[str, str] = {} + sdist_build_requires: dict[str, str] = {} python_version_set: set[str] = set() wheel_name_python_version_list: list[str] = [] wheel_name_platforms: set[str] = set() @@ -134,8 +134,16 @@ def resolve_fields(self, purl: PackageURL) -> None: if py_version := json_extract(release, ["requires_python"], str): python_version_set.add(py_version.replace(" ", "")) + self.data["has_binaries"] = not pypi_package_json.has_pure_wheel() + + if self.data["has_binaries"]: + logger.debug("Can not find a pure wheel") + else: + logger.debug("Found pure wheel matching this PURL") + try: - with pypi_package_json.wheel(): + # Argument called download_binaries + with pypi_package_json.wheel(download_binaries=self.data["has_binaries"]): logger.debug("Wheel at %s", pypi_package_json.wheel_path) # Should only have .dist-info directory. logger.debug("It has directories %s", ",".join(os.listdir(pypi_package_json.wheel_path))) @@ -155,8 +163,20 @@ def resolve_fields(self, purl: PackageURL) -> None: chronologically_likeliest_version = ( pypi_package_json.get_chronologically_suitable_setuptools_version() ) + try: + # Get information from the wheel file name. + logger.debug(pypi_package_json.wheel_filename) + _, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename) + for tag in tags: + wheel_name_python_version_list.append(tag.interpreter) + wheel_name_platforms.add(tag.platform) + logger.debug(python_version_set) + except InvalidWheelFilename: + logger.debug("Could not parse wheel file name to extract version") + except WheelTagError: + logger.debug("Can not analyze non-pure wheels") except SourceCodeError: - logger.debug("Could not find pure wheel matching this PURL") + logger.debug("Could not download wheel matching this PURL") logger.debug("From .dist_info:") logger.debug(parsed_build_requires) @@ -169,27 +189,32 @@ def resolve_fields(self, purl: PackageURL) -> None: content = tomli.loads(pyproject_content.decode("utf-8")) requires = json_extract(content, ["build-system", "requires"], list) if requires: - build_requires_set.update(elem.replace(" ", "") for elem in requires) + for requirement in requires: + self.add_parsed_requirement(sdist_build_requires, requirement) # If we cannot find `requires` in `[build-system]`, we lean on the fact that setuptools # was the de-facto build tool, and infer a setuptools version to include. else: - build_requires_set.add(f"setuptools=={chronologically_likeliest_version}") + self.add_parsed_requirement( + sdist_build_requires, f"setuptools=={chronologically_likeliest_version}" + ) backend = json_extract(content, ["build-system", "build-backend"], str) if backend: build_backends_set.add(backend.replace(" ", "")) python_version_constraint = json_extract(content, ["project", "requires-python"], str) if python_version_constraint: python_version_set.add(python_version_constraint.replace(" ", "")) - self.apply_tool_specific_inferences(build_requires_set, python_version_set, content) + self.apply_tool_specific_inferences(sdist_build_requires, python_version_set, content) logger.debug( "After analyzing pyproject.toml from the sdist: build-requires: %s, build_backend: %s", - build_requires_set, + sdist_build_requires, build_backends_set, ) # Here we have successfully analyzed the pyproject.toml file. Now, if we have a setup.py/cfg, # we also need to infer a setuptools version to infer. if pypi_package_json.file_exists("setup.py") or pypi_package_json.file_exists("setup.cfg"): - build_requires_set.add(f"setuptools=={chronologically_likeliest_version}") + self.add_parsed_requirement( + sdist_build_requires, f"setuptools=={chronologically_likeliest_version}" + ) except TypeError as error: logger.debug( "Found a type error while reading the pyproject.toml file from the sdist: %s", error @@ -200,30 +225,20 @@ def resolve_fields(self, purl: PackageURL) -> None: logger.debug("No pyproject.toml found: %s", error) # Here we do not have a pyproject.toml file. Instead, we lean on the fact that setuptools # was the de-facto build tool, and infer a setuptools version to include. - build_requires_set.add(f"setuptools=={chronologically_likeliest_version}") + self.add_parsed_requirement( + sdist_build_requires, f"setuptools=={chronologically_likeliest_version}" + ) except SourceCodeError as error: logger.debug("No source distribution found: %s", error) + logger.debug("After complete analysis of the sdist:") + logger.debug(sdist_build_requires) + # Merge in pyproject.toml information only when the wheel dist_info does not contain the same. # Hatch is an interesting example of this merge being required. - for requirement in build_requires_set: - try: - parsed_requirement = Requirement(requirement) - if parsed_requirement.name not in parsed_build_requires: - parsed_build_requires[parsed_requirement.name] = str(parsed_requirement.specifier) - except (InvalidRequirement, InvalidSpecifier) as error: - logger.debug("Malformed requirement encountered %s : %s", requirement, error) - - try: - # Get information from the wheel file name. - logger.debug(pypi_package_json.wheel_filename) - _, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename) - for tag in tags: - wheel_name_python_version_list.append(tag.interpreter) - wheel_name_platforms.add(tag.platform) - logger.debug(python_version_set) - except InvalidWheelFilename: - logger.debug("Could not parse wheel file name to extract version") + for requirement_name, specifier in sdist_build_requires.items(): + if requirement_name not in parsed_build_requires: + parsed_build_requires[requirement_name] = specifier self.data["language_version"] = list(python_version_set) or wheel_name_python_version_list @@ -243,9 +258,18 @@ def resolve_fields(self, purl: PackageURL) -> None: if not patched_build_commands: # Resolve and patch build commands. - selected_build_commands = self.data["build_commands"] or self.get_default_build_commands( - self.data["build_tools"] - ) + + # To ensure that selected_build_commands is never empty, we seed with the fallback + # command of python -m build --wheel -n + if self.data["build_commands"]: + selected_build_commands = self.data["build_commands"] + else: + self.data["build_commands"] = ["python -m build --wheel -n".split()] + selected_build_commands = ( + self.get_default_build_commands(self.data["build_tools"]) or self.data["build_commands"] + ) + + logger.debug(selected_build_commands) patched_build_commands = ( patch_commands( @@ -259,16 +283,34 @@ def resolve_fields(self, purl: PackageURL) -> None: self.data["build_commands"] = patched_build_commands + def add_parsed_requirement(self, build_requirements: dict[str, str], requirement: str) -> None: + """ + Parse a requirement string and add it to build_requirements, doing appropriate error handling. + + Parameters + ---------- + build_requirements: dict[str,str] + Dictionary of build requirements to populate. + requirement: str + Requirement string to parse. + """ + try: + parsed_requirement = Requirement(requirement) + if parsed_requirement.name not in build_requirements: + build_requirements[parsed_requirement.name] = str(parsed_requirement.specifier) + except (InvalidRequirement, InvalidSpecifier) as error: + logger.debug("Malformed requirement encountered %s : %s", requirement, error) + def apply_tool_specific_inferences( - self, build_requires_set: set[str], python_version_set: set[str], pyproject_contents: dict[str, Any] + self, build_requirements: dict[str, str], python_version_set: set[str], pyproject_contents: dict[str, Any] ) -> None: """ Based on build tools inferred, look into the pyproject.toml for related additional dependencies. Parameters ---------- - build_requires_set: set[str] - Set of build requirements to populate. + build_requirements: dict[str,str] + Dictionary of build requirements to populate. python_version_set: set[str] Set of compatible interpreter versions to populate. pyproject_contents: dict[str, Any] @@ -283,7 +325,8 @@ def apply_tool_specific_inferences( for _, section in hatch_build_hooks.items(): dependencies = section.get("dependencies") if dependencies: - build_requires_set.update(elem.replace(" ", "") for elem in dependencies) + for requirement in dependencies: + self.add_parsed_requirement(build_requirements, requirement) # If we have flit as a build_tool, we will check if the legacy header [tool.flit.metadata] exists, # and if so, check to see if we can use its "requires-python". if "flit" in self.data["build_tools"]: diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py index ef2360a5c..dd70da6f0 100644 --- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py +++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py @@ -1,9 +1,10 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module implements the logic to generate a dockerfile from a Python buildspec.""" import logging +import re from textwrap import dedent from packaging.specifiers import InvalidSpecifier, SpecifierSet @@ -33,10 +34,11 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str: GenerateBuildSpecError Raised if dockerfile cannot be generated. """ + if buildspec["has_binaries"]: + raise GenerateBuildSpecError("We currently do not support generating a dockerfile for non-pure Python packages") language_version: str | None = pick_specific_version(buildspec) if language_version is None: - logger.debug("Could not derive a specific interpreter version.") - raise GenerateBuildSpecError("Could not derive specific interpreter version.") + raise GenerateBuildSpecError("Could not derive specific interpreter version") backend_install_commands: str = " && ".join(build_backend_commands(buildspec)) build_tool_install: str = "" if ( @@ -124,8 +126,18 @@ def pick_specific_version(buildspec: BaseBuildSpecDict) -> str | None: try: version_set &= SpecifierSet(version) except InvalidSpecifier as error: - logger.debug("Malformed interpreter version encountered: %s (%s)", version, error) - return None + logger.debug("Non-standard interpreter version encountered: %s (%s)", version, error) + # Whilst the Python tags specify interpreter implementation + # as well as version, with no standard way to parse out the + # implementation, we can attempt to heuristically: + try_parse_version = infer_interpreter_version(version) + if try_parse_version: + try: + version_set &= SpecifierSet(f">={try_parse_version}") + except InvalidSpecifier as error_for_retry: + logger.debug("Could not parse interpreter version from: %s (%s)", version, error_for_retry) + + logger.debug(version_set) # Now to get the latest acceptable one, we can step through all interpreter # versions. For the most accurate result, we can query python.org for a @@ -141,6 +153,31 @@ def pick_specific_version(buildspec: BaseBuildSpecDict) -> str | None: return None +def infer_interpreter_version(tag: str) -> str | None: + """Infer interpreter version from Python-tag. + + Parameters + ---------- + tag: Python-tag, likely inferred from wheel name. + + + Returns + ------- + str: interpreter version inferred from Python-tag + """ + # We will parse the interpreter version of CPython or just + # whatever generic Python version is specified. + pattern = re.compile(r"^(py|cp)(\d{1,3})$") + parsed_tag = pattern.match(tag) + if parsed_tag: + digits = parsed_tag.group(2) + # As match succeeded len(digits) \in {1,2,3} + if len(digits) == 1: + return parsed_tag.group(2) + return f"{digits[0]}.{digits[1:]}" + return None + + def build_backend_commands(buildspec: BaseBuildSpecDict) -> list[str]: """Generate the installation commands for each inferred build backend. diff --git a/src/macaron/errors.py b/src/macaron/errors.py index d088914de..569ec1817 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains error classes for Macaron.""" @@ -129,3 +129,7 @@ class QueryMacaronDatabaseError(Exception): class GenerateBuildSpecError(Exception): """Happens when there is an unexpected error while generating the build spec file.""" + + +class WheelTagError(MacaronError): + """Happens when a Python wheel with unsupported tags is requested for analysis.""" diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py index ce8630d37..ce278b35c 100644 --- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The module provides abstractions for the pypi package registry.""" @@ -22,9 +22,10 @@ import requests from bs4 import BeautifulSoup, Tag +from packaging.utils import InvalidWheelFilename, parse_wheel_filename from macaron.config.defaults import defaults -from macaron.errors import ConfigurationError, InvalidHTTPResponseError, SourceCodeError +from macaron.errors import ConfigurationError, InvalidHTTPResponseError, SourceCodeError, WheelTagError from macaron.json_tools import json_extract from macaron.malware_analyzer.datetime_parser import parse_datetime from macaron.slsa_analyzer.package_registry.package_registry import PackageRegistry @@ -863,8 +864,10 @@ def get_latest_release_upload_time(self) -> str | None: return None @contextmanager - def wheel(self) -> Generator[None]: + def wheel(self, download_binaries: bool) -> Generator[None]: """Download and cleanup wheel of the package with a context manager.""" + if download_binaries: + raise WheelTagError("Macaron does not currently support analysis of non-pure Python wheels.") if not self.download_wheel(): raise SourceCodeError("Unable to download requested wheel.") yield @@ -889,6 +892,38 @@ def download_wheel(self) -> bool: logger.debug(error) return False + def has_pure_wheel(self) -> bool: + """Check whether the PURL has a pure wheel from its package json. + + Returns + ------- + bool + Whether the PURL has a pure wheel or not. + """ + if self.component_version: + urls = json_extract(self.package_json, ["releases", self.component_version], list) + else: + # Get the latest version. + urls = json_extract(self.package_json, ["urls"], list) + if not urls: + return False + for distribution in urls: + file_name: str = distribution.get("filename") or "" + # Parse out and check none and any + # Catch exceptions + try: + _, _, _, tags = parse_wheel_filename(file_name) + # Check if none and any are in the tags (i.e. the wheel is pure) + # Technically a wheel can have multiple tag sets. Our condition for + # a pure wheel is that it has only one tag set with abi "none" and + # platform "any" + if len(tags) == 1 and all(tag.abi == "none" and tag.platform == "any" for tag in tags): + return True + except InvalidWheelFilename: + logger.debug("Could not parse wheel name.") + return False + return False + @contextmanager def sourcecode(self) -> Generator[None]: """Download and cleanup source code of the package with a context manager.""" diff --git a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py index b62ea049a..c8d4d8882 100644 --- a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py +++ b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -27,6 +27,7 @@ def fixture_base_build_spec() -> BaseBuildSpecDict: "ecosystem": "pypi", "purl": "pkg:pypi/cachetools@6.2.1", "language": "python", + "has_binaries": False, "build_tools": ["pip"], "build_commands": [["python", "-m", "build"]], "build_requires": {"setuptools": "==80.9.0", "wheel": ""}, diff --git a/tests/integration/cases/pypi_cachetools/expected_default.buildspec b/tests/integration/cases/pypi_cachetools/expected_default.buildspec index 0b5d8acfa..2a05c0e95 100644 --- a/tests/integration/cases/pypi_cachetools/expected_default.buildspec +++ b/tests/integration/cases/pypi_cachetools/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.18.0", + "macaron_version": "0.20.0", "group_id": null, "artifact_id": "cachetools", "version": "6.2.1", @@ -24,6 +24,7 @@ "-n" ] ], + "has_binaries": false, "build_requires": { "setuptools": "==80.9.0", "wheel": "" diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec index e7842d046..e610ee866 100644 --- a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec +++ b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec @@ -1,5 +1,5 @@ { - "macaron_version": "0.18.0", + "macaron_version": "0.20.0", "group_id": null, "artifact_id": "markdown-it-py", "version": "4.0.0", @@ -21,6 +21,7 @@ "build" ] ], + "has_binaries": false, "build_requires": { "flit": "==3.12.0", "flit_core": "<4,>=3.4" diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec index 819113207..875523655 100644 --- a/tests/integration/cases/pypi_toga/expected_default.buildspec +++ b/tests/integration/cases/pypi_toga/expected_default.buildspec @@ -24,6 +24,7 @@ "-n" ] ], + "has_binaries": false, "build_requires": { "setuptools": "==80.3.1", "setuptools_dynamic_dependencies": "==1.0.0",