From 7ac9e54b24e6abab3ea89df4617d974024407a59 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 24 Sep 2024 14:50:13 +1000 Subject: [PATCH] chore: refactor the local artifact finding logic and add pypi support --- src/macaron/artifact/local_artifact.py | 213 +++++++++++++++----- src/macaron/slsa_analyzer/analyzer.py | 36 +++- tests/artifact/test_local_artifact.py | 263 +++++++++++++++++++------ 3 files changed, 392 insertions(+), 120 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 67b70c318..3d23a7e7b 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -3,70 +3,177 @@ """This module declares types and utilities for handling local artifacts.""" +import fnmatch +import glob import os from collections.abc import Mapping from packageurl import PackageURL from macaron.artifact.maven import construct_maven_repository_path -from macaron.config.global_config import global_config -def get_local_artifact_repo_mapper() -> Mapping[str, str]: - """Get A.""" - local_artifact_mapper: dict[str, str] = {} +def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) to be search in a maven layout local repo for artifact directories. - if global_config.local_maven_repo: - local_artifact_mapper["maven"] = global_config.local_maven_repo + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) - if global_config.python_venv_path: - local_artifact_mapper["pypi"] = global_config.python_venv_path + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + """ + if not maven_purl.type == "maven": + return None - return local_artifact_mapper + group = maven_purl.namespace + artifact = maven_purl.name + version = maven_purl.version + if group is None or version is None: + return None -def construct_local_artifact_paths_from_purl( - build_purl_type: str, - component_purl: PackageURL, - local_artifact_repo_mapper: Mapping[str, str], + return [construct_maven_repository_path(group, artifact, version)] + + +def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) to be search in a Python virtual environment for artifact directories. + + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) + + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + """ + if not pypi_purl.type == "pypi": + return None + + name = pypi_purl.name + version = pypi_purl.version + + if version is None: + return None + + # These patterns are from the content of a wheel file, which are extracted into the site-packages + # directory. References: + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents + glob_patterns = [] + glob_patterns.append(name) + glob_patterns.append(f"{name}-{version}.dist-info") + glob_patterns.append(f"{name}-{version}.data") + + return glob_patterns + + +def find_artifact_paths_from_local_maven_repo( + local_maven_repo: str, + glob_patterns: list[str], +) -> list[str] | None: + """Return a list of existed directories within `local_maven_repo`. + + Each directory path has the form ``local_maven_repo``/. + + None means error. + """ + if not os.path.isdir(local_maven_repo): + return None + + artifact_paths = [] + for pattern in glob_patterns: + found_paths = glob.glob( + root_dir=local_maven_repo, + pathname=pattern, + ) + + for found_path in found_paths: + full_path = os.path.join(local_maven_repo, found_path) + if os.path.isdir(full_path): + artifact_paths.append(full_path) + + return artifact_paths + + +# Assume that local_python_venv exists. +# In here we need to do it case-insensitively +# We also assume that packages are just one level down from venv_path +# The return element are relative paths from venv. +def find_artifact_paths_from_python_venv( + venv_path: str, + glob_patterns: list[str], ) -> list[str] | None: - """Get B.""" - local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) - if local_artifact_repo is None: + """TBD.""" + if not os.path.isdir(venv_path): + return None + + artifact_paths = [] + + try: + venv_path_entries = os.listdir(venv_path) + except (NotADirectoryError, PermissionError, FileNotFoundError): return None - artifact_path = [] - match build_purl_type: - case "maven": - group = component_purl.namespace - artifact = component_purl.name - version = component_purl.version - - if group is None or version is None: - return None - - artifact_path.append( - os.path.join( - local_artifact_repo, - "repository", - construct_maven_repository_path(group, artifact, version), - ) - ) - case "pypi": - # TODO: implement this. - pass - case _: + all_package_dirs: list[str] = [] + for entry in venv_path_entries: + entry_path = os.path.join(venv_path, entry) + if os.path.isdir(entry_path): + all_package_dirs.append(entry) + + for package_dir in all_package_dirs: + for pattern in glob_patterns: + if fnmatch.fnmatch(package_dir.lower(), pattern.lower()): + full_path = os.path.join(venv_path, package_dir) + artifact_paths.append(full_path) + + return artifact_paths + + +def _get_local_artifact_path_for_build_tool_purl_type( + purl: PackageURL, + build_tool_purl_type: str, + local_artifact_repo: str, +) -> list[str] | None: + """TBD.""" + if build_tool_purl_type == "maven": + maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) + if not maven_artifact_patterns: return None - return artifact_path + artifact_paths = find_artifact_paths_from_local_maven_repo( + local_maven_repo=local_artifact_repo, + glob_patterns=maven_artifact_patterns, + ) + + if artifact_paths: + return artifact_paths + + if build_tool_purl_type == "pypi": + pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + if not pypi_artifact_patterns: + return None + + artifact_paths = find_artifact_paths_from_python_venv( + venv_path=local_artifact_repo, + glob_patterns=pypi_artifact_patterns, + ) + + if artifact_paths: + return artifact_paths + + return None # key: purl type # value: list of paths # If a key doesn't exist -> cannot construct the artifact paths for that purl type -# (no local artifact repo found or not enough information from PURL string or simply -# the PURL string is not applicable for that purl type). -# If a value is an empty list -> Can construct the local artifact paths but no paths exist in the local artifact repository. +# (no local artifact repo found or not enough information from PURL type is not supported) OR no valid artifact paths found. +# We assume that the paths in local_artifact_repo_mapper all exists/ def get_local_artifact_paths( purl: PackageURL, build_tool_purl_types: list[str], @@ -75,22 +182,20 @@ def get_local_artifact_paths( """Get C.""" local_artifact_paths_purl_mapping = {} - for build_purl_type in build_tool_purl_types: - local_artfiact_paths = construct_local_artifact_paths_from_purl( - build_purl_type=build_purl_type, - component_purl=purl, - local_artifact_repo_mapper=local_artifact_repo_mapper, - ) - - if not local_artfiact_paths: + for build_tool_purl_type in build_tool_purl_types: + local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type) + if not local_artifact_repo: continue - resolved_local_artifact_paths = [] + artifact_paths = _get_local_artifact_path_for_build_tool_purl_type( + purl=purl, + build_tool_purl_type=build_tool_purl_type, + local_artifact_repo=local_artifact_repo, + ) - for local_artifact_path in local_artfiact_paths: - if os.path.isdir(local_artifact_path): - resolved_local_artifact_paths.append(local_artifact_path) + if not artifact_paths: + continue - local_artifact_paths_purl_mapping[build_purl_type] = resolved_local_artifact_paths + local_artifact_paths_purl_mapping[build_tool_purl_type] = artifact_paths return local_artifact_paths_purl_mapping diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index cdd27ffd3..c707daaea 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -2,10 +2,13 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles the cloning and analyzing a Git repo.""" + +import glob import logging import os import re import sys +from collections.abc import Mapping from datetime import datetime, timezone from pathlib import Path from typing import Any, NamedTuple @@ -17,7 +20,7 @@ from sqlalchemy.orm import Session from macaron import __version__ -from macaron.artifact.local_artifact import get_local_artifact_paths, get_local_artifact_repo_mapper +from macaron.artifact.local_artifact import get_local_artifact_paths from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -472,15 +475,14 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest - discovered_build_toosl = ( + discovered_build_tools = ( analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"] ) - build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_toosl] + build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_tools] analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths( - # The PURL is definitely valid here. PackageURL.from_string(analyze_ctx.component.purl), build_tools_purl_types, - local_artifact_repo_mapper=get_local_artifact_repo_mapper(), + local_artifact_repo_mapper=self._get_local_artifact_repo_mapper(), ) analyze_ctx.check_results = registry.scan(analyze_ctx) @@ -1131,6 +1133,30 @@ def _determine_package_registries(self, analyze_ctx: AnalyzeContext) -> None: ) ) + @staticmethod + def _get_local_artifact_repo_mapper() -> Mapping[str, str]: + """Return the mapping between purl type and its local artifact repo path if that path exists.""" + local_artifact_mapper: dict[str, str] = {} + + if global_config.local_maven_repo: + m2_repository_dir = os.path.join(global_config.local_maven_repo, "repository") + if os.path.isdir(m2_repository_dir): + local_artifact_mapper["maven"] = m2_repository_dir + + if global_config.python_venv_path: + site_packages_dir_pattern = os.path.join( + global_config.python_venv_path, + "lib", + "python3.*", + "site-packages", + ) + site_packages_dirs = glob.glob(site_packages_dir_pattern) + + if len(site_packages_dirs) == 1: + local_artifact_mapper["pypi"] = site_packages_dirs.pop() + + return local_artifact_mapper + class DuplicateCmpError(DuplicateError): """This class is used for duplicated software component errors.""" diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 50157ac5e..87e571f5a 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -3,74 +3,138 @@ """Test the local artifact utilities.""" -import tempfile -from collections.abc import Mapping +import os +from pathlib import Path import pytest from packageurl import PackageURL -from macaron.artifact.local_artifact import construct_local_artifact_paths_from_purl, get_local_artifact_paths +from macaron.artifact.local_artifact import ( + construct_local_artifact_paths_glob_pattern_maven_purl, + construct_local_artifact_paths_glob_pattern_pypi_purl, + find_artifact_paths_from_python_venv, + get_local_artifact_paths, +) @pytest.mark.parametrize( - ("build_purl_type", "purl_str", "local_artifact_repo_mapper", "expectation"), + ("purl_str", "expectation"), [ pytest.param( - "maven", "pkg:maven/com.google.guava/guava@33.2.1-jre", - {"maven": "/home/foo/.m2"}, - ["/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre"], - id="A maven type PURL with available local maven repo", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group, artifact and version", ), pytest.param( - "maven", - "pkg:maven/com.google.guava/guava@33.2.1-jre", - {}, - None, - id="A maven type PURL without an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre?type=jar", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group artifact, version and type qualifier", ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given maven purl.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param("pkg:pypi/django@5.0.6", id="The purl type is not supported."), + pytest.param("pkg:maven/guava@33.2.1-jre", id="Missing group id in the PURL"), + pytest.param("pkg:maven/guava", id="Missing version"), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given maven purl with error.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is None + + +@pytest.mark.parametrize( + ("purl_str", "expectation"), + [ pytest.param( - "maven", - "pkg:maven/com.google.guava/guava@33.2.1-jre", - {"pypi": "/home/foo/.venv"}, - None, - id="A maven type PURL without an available local maven repo but there is a Python venv", + "pkg:pypi/django@5.0.6", + ["django", "django-5.0.6.dist-info", "django-5.0.6.data"], + id="A valid pypi PURL with version", + ) + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given pypi purl.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param( + "pkg:pypi/django", + id="A pypi PURL without version", ), pytest.param( - "maven", - "pkg:maven/com.google.guava/guava", - {"maven": "/home/foo/.m2"}, - None, - id="A maven type PURL with missing version and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + id="The purl type is not supported.", ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given pypi purl with error.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is None + + +def test_find_artifact_paths_from_invalid_python_venv() -> None: + """Test find_artifact_paths_from_python_venv method with invalid venv path""" + assert not find_artifact_paths_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) + + +@pytest.mark.parametrize( + ("purl_str", "build_tool_purl_types", "local_artifact_repo_mapper", "expectation"), + [ pytest.param( - "maven", - "pkg:maven/guava", - {"maven": "/home/foo/.m2"}, - None, - id="A maven type PURL with missing groupd Id and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + ["maven", "pypi"], + {}, + {}, + id="A maven type PURL where multiple build tool types are discovered. But no local repository is available.", ), pytest.param( - "maven", - "pkg:github/oracle/macaron", - {"maven": "/home/foo/.m2"}, - None, - id="A git type PURL and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + {}, + {}, + id="A maven type PURL where no build tool types are discovered and no local repository is available.", ), ], ) -def test_construct_local_artifact_path_from_purl( - build_purl_type: str, +def test_get_local_artifact_paths_empty( purl_str: str, - local_artifact_repo_mapper: Mapping[str, str], - expectation: list[str], + build_tool_purl_types: list[str], + local_artifact_repo_mapper: dict[str, str], + expectation: dict[str, list[str]], ) -> None: - """Test constructing a local artifact path from a given purl.""" - component_purl = PackageURL.from_string(purl_str) + """Test getting local artifact paths where the result is empty.""" + purl = PackageURL.from_string(purl_str) assert ( - construct_local_artifact_paths_from_purl( - build_purl_type=build_purl_type, - component_purl=component_purl, + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, local_artifact_repo_mapper=local_artifact_repo_mapper, ) == expectation @@ -83,31 +147,108 @@ def test_construct_local_artifact_path_from_purl( pytest.param( "pkg:maven/com.google.guava/guava@33.2.1-jre", ["maven", "pypi"], - {"maven": []}, - id="A maven type PURL where multiple build tool types are discovered. But no artifact path is available.", + {}, + id="A maven type PURL where multiple build tool types are discovered", + ), + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + {}, + id="A maven type PURL where no build tool is discovered", + ), + pytest.param( + "pkg:pypi/django@5.0.3", + [], + {}, + id="A maven type PURL where no build tool is discovered", ), ], ) -def test_get_local_artifact_paths_non_existing( +def test_get_local_artifact_paths_not_available( purl_str: str, build_tool_purl_types: list[str], expectation: dict[str, list[str]], + tmp_path: Path, ) -> None: - """Test getting local artifact paths of non existing artifacts. - - The local artifact repos are available. - """ + """Test getting local artifact paths where the artifact paths are not available.""" purl = PackageURL.from_string(purl_str) - with tempfile.TemporaryDirectory() as temp_dir: - local_artifact_repo_mapper = { - "maven": temp_dir, - "pypi": temp_dir, - } - assert ( - get_local_artifact_paths( - purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, - ) - == expectation + local_artifact_repo_mapper = { + "maven": str(tmp_path), + "pypi": str(tmp_path), + } + + assert ( + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, ) + == expectation + ) + + +def test_get_local_artifact_paths_succeeded_maven(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with maven purl.""" + purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") + build_tool_purl_types = ["maven", "pypi"] + + tmp_path_str = str(tmp_path) + + local_artifact_repo_mapper = { + "maven": f"{tmp_path_str}/.m2/repository", + "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", + } + maven_artifact_path = f"{local_artifact_repo_mapper['maven']}/com/oracle/macaron/macaron/0.13.0" + os.makedirs(local_artifact_repo_mapper["maven"]) + os.makedirs(local_artifact_repo_mapper["pypi"]) + os.makedirs(maven_artifact_path) + + expectation = { + "maven": [maven_artifact_path], + } + + result = get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + + assert result == expectation + + +def test_get_local_artifact_paths_succeeded_pypi(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with pypi purl.""" + purl = PackageURL.from_string("pkg:pypi/macaron@0.13.0") + build_tool_purl_types = ["maven", "pypi"] + + tmp_path_str = str(tmp_path) + + local_artifact_repo_mapper = { + "maven": f"{tmp_path_str}/.m2/repository", + "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", + } + pypi_artifact_paths = [ + f"{local_artifact_repo_mapper['pypi']}/macaron", + f"{local_artifact_repo_mapper['pypi']}/macaron-0.13.0.dist-info", + f"{local_artifact_repo_mapper['pypi']}/Macaron-0.13.0.dist-info", + ] + + os.makedirs(local_artifact_repo_mapper["maven"]) + os.makedirs(local_artifact_repo_mapper["pypi"]) + + for artifact_path in pypi_artifact_paths: + os.makedirs(artifact_path) + + expectation = { + "pypi": sorted(pypi_artifact_paths), + } + + result = get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + for value in result.values(): + value.sort() + + assert result == expectation