diff --git a/scanpipe/pipelines/inspect_packages.py b/scanpipe/pipelines/inspect_packages.py index 28c19a060..98e67875d 100644 --- a/scanpipe/pipelines/inspect_packages.py +++ b/scanpipe/pipelines/inspect_packages.py @@ -20,14 +20,19 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -from scanpipe.pipelines import Pipeline +from scanpipe.pipelines.scan_codebase import ScanCodebase from scanpipe.pipes import resolve from scanpipe.pipes import update_or_create_package -class InspectPackages(Pipeline): +class InspectPackages(ScanCodebase): """ - Inspect one or more manifest files and resolve their associated packages. + Inspect a codebase/package with one or more manifest files and + resolve their associated packages. + + Supports resolved packages for: + - Python: using nexB/python-inspector, supports requirements.txt and + setup.py manifests as input Supports: - BOM: SPDX document, CycloneDX BOM, AboutCode ABOUT file @@ -48,6 +53,10 @@ class InspectPackages(Pipeline): @classmethod def steps(cls): return ( + cls.copy_inputs_to_codebase_directory, + cls.extract_archives, + cls.collect_and_create_codebase_resources, + cls.flag_ignored_resources, cls.get_manifest_inputs, cls.get_packages_from_manifest, cls.create_resolved_packages, @@ -55,19 +64,28 @@ def steps(cls): def get_manifest_inputs(self): """Locate all the manifest files from the project's input/ directory.""" - self.input_locations = [ - str(input.absolute()) for input in self.project.inputs() - ] + self.manifest_resources = resolve.get_manifest_resources(self.project) def get_packages_from_manifest(self): """Get packages data from manifest files.""" self.resolved_packages = [] - for input_location in self.input_locations: - packages = resolve.resolve_packages(input_location) - if not packages: - raise Exception(f"No packages could be resolved for {input_location}") - self.resolved_packages.extend(packages) + if not self.manifest_resources.exists(): + self.project.add_warning( + description="No manifests found for resolving packages", + model="get_packages_from_manifest", + ) + return + + for resource in self.manifest_resources: + if packages := resolve.resolve_packages(resource.location): + self.resolved_packages.extend(packages) + else: + self.project.add_error( + description="No packages could be resolved for", + model="get_packages_from_manifest", + details={"path": resource.path}, + ) def create_resolved_packages(self): """Create the resolved packages and their dependencies in the database.""" diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 25b343d9b..e430e504f 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -35,6 +35,7 @@ from scanpipe.models import DiscoveredPackage from scanpipe.pipes import cyclonedx +from scanpipe.pipes import flag from scanpipe.pipes import spdx """ @@ -45,8 +46,10 @@ def resolve_packages(input_location): """Resolve the packages from manifest file.""" default_package_type = get_default_package_type(input_location) + # we only try to resolve packages if file at input_location is + # a package manifest, and ignore for other files if not default_package_type: - raise Exception(f"No package type found for {input_location}") + return # The ScanCode.io resolvers take precedence over the ScanCode-toolkit ones. resolver = resolver_registry.get(default_package_type) @@ -59,6 +62,16 @@ def resolve_packages(input_location): return resolved_packages +def get_manifest_resources(project): + """Get all resources in the codebase which are package manifests.""" + for resource in project.codebaseresources.no_status(): + manifest_type = get_default_package_type(input_location=resource.location) + if manifest_type: + resource.update(status=flag.APPLICATION_PACKAGE) + + return project.codebaseresources.filter(status=flag.APPLICATION_PACKAGE) + + def resolve_pypi_packages(input_location): """Resolve the PyPI packages from the `input_location` requirements file.""" python_version = f"{sys.version_info.major}{sys.version_info.minor}" diff --git a/scanpipe/tests/data/manifests/python-inspector-0.10.0.zip b/scanpipe/tests/data/manifests/python-inspector-0.10.0.zip new file mode 100644 index 000000000..6ef18a529 Binary files /dev/null and b/scanpipe/tests/data/manifests/python-inspector-0.10.0.zip differ diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index b097d2b0c..bf2eec471 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -843,9 +843,43 @@ def test_scanpipe_inspect_manifest_pipeline_integration(self): pipeline = run.make_pipeline_instance() project1.move_input_from(tempfile.mkstemp()[1]) + pipeline.execute() + self.assertEqual(1, project1.projectmessages.count()) + message = project1.projectmessages.get() + self.assertEqual("get_packages_from_manifest", message.model) + expected = "No manifests found for resolving packages" + self.assertIn(expected, message.description) + + def test_scanpipe_inspect_manifest_pipeline_integration_empty_manifest(self): + pipeline_name = "inspect_packages" + project1 = Project.objects.create(name="Analysis") + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1]) + pipeline.execute() + self.assertEqual(1, project1.projectmessages.count()) + message = project1.projectmessages.get() + self.assertEqual("get_packages_from_manifest", message.model) + expected = "No packages could be resolved for" + self.assertIn(expected, message.description) + + def test_scanpipe_inspect_manifest_pipeline_integration_misc(self): + pipeline_name = "inspect_packages" + project1 = Project.objects.create(name="Analysis") + + input_location = ( + self.data_location / "manifests" / "python-inspector-0.10.0.zip" + ) + project1.copy_input_from(input_location) + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + exitcode, out = pipeline.execute() - self.assertEqual(1, exitcode, msg=out) - self.assertIn("No package type found for", out) + self.assertEqual(0, exitcode, msg=out) + self.assertEqual(26, project1.discoveredpackages.count()) @mock.patch("scanpipe.pipes.resolve.resolve_dependencies") def test_scanpipe_inspect_manifest_pipeline_pypi_integration( @@ -857,12 +891,7 @@ def test_scanpipe_inspect_manifest_pipeline_pypi_integration( run = project1.add_pipeline(pipeline_name) pipeline = run.make_pipeline_instance() - resolve_dependencies.return_value = mock.Mock(packages=[]) project1.move_input_from(tempfile.mkstemp(suffix="requirements.txt")[1]) - exitcode, out = pipeline.execute() - self.assertEqual(1, exitcode, msg=out) - self.assertIn("No packages could be resolved", out) - resolve_dependencies.return_value = mock.Mock(packages=[package_data1]) exitcode, out = pipeline.execute() self.assertEqual(0, exitcode, msg=out)