From ea9a1912d32ce95c7bee7db66d432842bacb2295 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Fri, 23 Feb 2024 18:59:53 -0800 Subject: [PATCH 1/6] Add and refactor metadata and urls code and tests #247 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- .../src/purldb_toolkit/cli_test_utils.py | 438 ++++++++++++++++++ purldb-toolkit/src/purldb_toolkit/purlcli.py | 113 +++-- .../data/purlcli/expected_urls_output.json | 56 +-- .../purlcli/expected_urls_output_head.json | 80 +--- .../purlcli/expected_urls_output_unique.json | 249 ++++++++++ purldb-toolkit/tests/test_purlcli.py | 229 +++++++-- 6 files changed, 949 insertions(+), 216 deletions(-) create mode 100644 purldb-toolkit/src/purldb_toolkit/cli_test_utils.py create mode 100644 purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json diff --git a/purldb-toolkit/src/purldb_toolkit/cli_test_utils.py b/purldb-toolkit/src/purldb_toolkit/cli_test_utils.py new file mode 100644 index 00000000..6883b221 --- /dev/null +++ b/purldb-toolkit/src/purldb_toolkit/cli_test_utils.py @@ -0,0 +1,438 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import io +import json +import os +import time + +import saneyaml +from commoncode.system import on_windows +from packageurl import PackageURL +from scancode_config import REGEN_TEST_FIXTURES, scancode_root_dir + + +def run_scan_plain( + options, + cwd=None, + test_mode=True, + expected_rc=0, + env=None, + retry=True, +): + """ + Run a scan as a plain subprocess. Return rc, stdout, stderr. + """ + + from commoncode.command import execute + + options = add_windows_extra_timeout(options) + + if test_mode and "--test-mode" not in options: + options.append("--test-mode") + + if not env: + env = dict(os.environ) + + scmd = "scancode" + scan_cmd = os.path.join(scancode_root_dir, scmd) + rc, stdout, stderr = execute( + cmd_loc=scan_cmd, + args=options, + cwd=cwd, + env=env, + ) + + if retry and rc != expected_rc: + # wait and rerun in verbose mode to get more in the output + time.sleep(1) + if "--verbose" not in options: + options.append("--verbose") + result = rc, stdout, stderr = execute( + cmd_loc=scan_cmd, + args=options, + cwd=cwd, + env=env, + ) + + if rc != expected_rc: + opts = get_opts(options) + error = ( + f""" +Failure to run: +rc: {rc} +scancode {opts} +stdout: +{stdout} + +stderr: +{stderr} +""" + % locals() + ) + assert rc == expected_rc, error + + return rc, stdout, stderr + + +def run_scan_click( + options, + monkeypatch=None, + test_mode=True, + expected_rc=0, + env=None, + retry=True, +): + """ + Run a scan as a Click-controlled subprocess + If monkeypatch is provided, a tty with a size (80, 43) is mocked. + Return a click.testing.Result object. + If retry is True, wait 10 seconds after a failure and retry once + """ + import shutil + + import click + from click.testing import CliRunner + from scancode import cli + + options = add_windows_extra_timeout(options) + + if test_mode and "--test-mode" not in options: + options.append("--test-mode") + + if monkeypatch: + monkeypatch.setattr(click._termui_impl, "isatty", lambda _: True) + monkeypatch.setattr( + shutil, + "get_terminal_size", + lambda: ( + 80, + 43, + ), + ) + + if not env: + env = dict(os.environ) + + runner = CliRunner() + + result = runner.invoke(cli.scancode, options, catch_exceptions=False, env=env) + if retry and result.exit_code != expected_rc: + if on_windows: + # wait and rerun in verbose mode to get more in the output + time.sleep(1) + if "--verbose" not in options: + options.append("--verbose") + result = runner.invoke(cli.scancode, options, catch_exceptions=False, env=env) + + if result.exit_code != expected_rc: + output = result.output + opts = get_opts(options) + error = f""" +Failure to run: +rc: {result.exit_code} +scancode {opts} +output: +{output} +""" + assert result.exit_code == expected_rc, error + return result + + +def get_opts(options): + opts = [o if isinstance(o, str) else repr(o) for o in options] + return " ".join(opts) + + +WINDOWS_CI_TIMEOUT = "222.2" + + +def add_windows_extra_timeout(options, timeout=WINDOWS_CI_TIMEOUT): + """ + Add a timeout to an options list if on Windows. + """ + if on_windows and "--timeout" not in options: + # somehow the Appevyor windows CI is now much slower and timeouts at 120 secs + options += ["--timeout", timeout] + return options + + +def remove_windows_extra_timeout(scancode_options, timeout=WINDOWS_CI_TIMEOUT): + """ + Strip a test timeout from a pretty scancode_options mapping if on Windows. + """ + if on_windows: + if scancode_options and scancode_options.get("--timeout") == timeout: + del scancode_options["--timeout"] + + +def check_json_scan( + expected_file, + result_file, + regen=False, + remove_file_date=False, + check_headers=False, + remove_uuid=True, +): + """ + Check the scan `result_file` JSON results against the `expected_file` + expected JSON results. + + If `regen` is True the expected_file WILL BE overwritten with the new scan + results from `results_file`. This is convenient for updating tests + expectations. But use with caution. + + If `remove_file_date` is True, the file.date attribute is removed. + If `check_headers` is True, the scan headers attribute is not removed. + If `remove_uuid` is True, removes UUID from Package and Dependency. + and if also `regen` is True then regenerate expected file with old UUIDs present already. + """ + results = load_json_result(location=result_file, remove_file_date=remove_file_date) + if remove_uuid: + results = remove_uuid_from_scan(results) + + if not check_headers: + results.pop("headers", None) + + if regen: + with open(expected_file, "w") as reg: + json.dump(results, reg, indent=2, separators=(",", ": ")) + expected = results + else: + expected = load_json_result( + location=expected_file, remove_file_date=remove_file_date + ) + if remove_uuid: + expected = remove_uuid_from_scan(expected) + if not check_headers: + expected.pop("headers", None) + + # NOTE we redump the JSON as a YAML string for easier display of + # the failures comparison/diff + if results != expected: + expected = saneyaml.dump(expected) + results = saneyaml.dump(results) + assert results == expected + + +def remove_uuid_from_scan(results): + """ + Remove Package and Dependency UUIDs from a ``results` mapping of scan data . + UUID fields are generated uniquely and would cause test failures + when comparing results and expected. + """ + for package in results.get("packages") or []: + package_uid = package.get("package_uid") + if package_uid: + package["package_uid"] = purl_with_fake_uuid(package_uid) + + for dependency in results.get("dependencies") or []: + dependency_uid = dependency.get("dependency_uid") + if dependency_uid: + dependency["dependency_uid"] = purl_with_fake_uuid(dependency_uid) + + for_package_uid = dependency.get("for_package_uid") + if for_package_uid: + dependency["for_package_uid"] = purl_with_fake_uuid(for_package_uid) + + for resource in results.get("files") or []: + for_packages = [] + has_packages = False + for fpkg in resource.get("for_packages") or []: + has_packages = True + for_packages.append(purl_with_fake_uuid(fpkg)) + + if has_packages: + resource["for_packages"] = for_packages + + return results + + +def purl_with_fake_uuid(purl): + purl = PackageURL.from_string(purl) + purl.qualifiers["uuid"] = "fixed-uid-done-for-testing-5642512d1758" + return purl.to_string() + + +def load_json_result(location, remove_file_date=False): + """ + Load the JSON scan results file at `location` location as UTF-8 JSON. + + To help with test resilience against small changes some attributes are + removed or streamlined such as the "tool_version" and scan "errors". + + To optionally also remove date attributes from "files" and "headers" + entries, set the `remove_file_date` argument to True. + """ + with io.open(location, encoding="utf-8") as res: + scan_results = res.read() + return load_json_result_from_string(scan_results, remove_file_date) + + +def load_json_result_from_string(string, remove_file_date=False): + """ + Load the JSON scan results `string` as UTF-8 JSON. + """ + scan_results = json.loads(string) + # clean new headers attributes + streamline_headers(scan_results.get("headers", [])) + # clean file_level attributes + for scanned_file in scan_results["files"]: + streamline_scanned_file(scanned_file, remove_file_date) + + # TODO: remove sort, this should no longer be needed + scan_results["files"].sort(key=lambda x: x["path"]) + return scan_results + + +def cleanup_scan(scan_results, remove_file_date=False): + """ + Cleanup in place the ``scan_results`` mapping for dates, headers and + other variable data that break tests otherwise. + """ + # clean new headers attributes + streamline_headers(scan_results.get("headers", [])) + # clean file_level attributes + for scanned_file in scan_results["files"]: + streamline_scanned_file(scanned_file, remove_file_date) + + # TODO: remove sort, this should no longer be needed + scan_results["files"].sort(key=lambda x: x["path"]) + return scan_results + + +def streamline_errors(errors): + """ + Modify the `errors` list in place to make it easier to test + """ + for i, error in enumerate(errors[:]): + error_lines = error.splitlines(True) + if len(error_lines) <= 1: + continue + # keep only first and last line + cleaned_error = "".join([error_lines[0] + error_lines[-1]]) + errors[i] = cleaned_error + + +def streamline_headers(headers): + """ + Modify the `headers` list of mappings in place to make it easier to test. + """ + for hle in headers: + hle.pop("tool_version", None) + remove_windows_extra_timeout(hle.get("options", {})) + hle.pop("start_timestamp", None) + hle.pop("end_timestamp", None) + hle.pop("duration", None) + header = hle.get("options", {}) + header.pop("--verbose", None) + streamline_errors(hle["errors"]) + + +def streamline_scanned_file(scanned_file, remove_file_date=False): + """ + Modify the `scanned_file` mapping for a file in scan results in place to + make it easier to test. + """ + streamline_errors(scanned_file.get("scan_errors", [])) + if remove_file_date: + scanned_file.pop("date", None) + + +def check_jsonlines_scan( + expected_file, + result_file, + regen=False, + remove_file_date=False, + check_headers=False, + remove_uuid=True, +): + """ + Check the scan result_file JSON Lines results against the expected_file + expected JSON results, which is a list of mappings, one per line. If regen + is True the expected_file WILL BE overwritten with the results. This is + convenient for updating tests expectations. But use with caution. + + If `remove_file_date` is True, the file.date attribute is removed. + """ + with io.open(result_file, encoding="utf-8") as res: + results = [json.loads(line) for line in res] + + if remove_uuid: + for result in results: + result = remove_uuid_from_scan(result) + streamline_jsonlines_scan(results, remove_file_date) + + if regen: + with open(expected_file, "w") as reg: + json.dump(results, reg, indent=2, separators=(",", ": ")) + + with io.open(expected_file, encoding="utf-8") as res: + expected = json.load(res) + if remove_uuid: + for result in results: + result = remove_uuid_from_scan(result) + + streamline_jsonlines_scan(expected, remove_file_date) + + if not check_headers: + results[0].pop("headers", None) + expected[0].pop("headers", None) + + expected = json.dumps(expected, indent=2, separators=(",", ": ")) + results = json.dumps(results, indent=2, separators=(",", ": ")) + assert results == expected + + +def streamline_jsonlines_scan(scan_result, remove_file_date=False): + """ + Remove or update variable fields from `scan_result`such as version and + errors to ensure that the test data is stable. + + If `remove_file_date` is True, the file.date attribute is removed. + """ + for result_line in scan_result: + headers = result_line.get("headers", {}) + if headers: + streamline_headers(headers) + + for scanned_file in result_line.get("files", []): + streamline_scanned_file(scanned_file, remove_file_date) + + +def check_json(expected, results, regen=REGEN_TEST_FIXTURES): + """ + Assert if the results JSON file is the same as the expected JSON file. + """ + if regen: + with open(expected, "w") as ex: + json.dump(results, ex, indent=2, separators=(",", ": ")) + with open(expected) as ex: + expected = json.load(ex) + + if results != expected: + expected = saneyaml.dump(expected) + results = saneyaml.dump(results) + assert results == expected + + +def load_both_and_check_json(expected, results, regen=REGEN_TEST_FIXTURES): + """ + Assert if the results JSON file is the same as the expected JSON file. + """ + with open(results) as res: + results = json.load(res) + + if regen: + mode = "w" + with open(expected, mode) as ex: + json.dump(results, ex, indent=2, separators=(",", ": ")) + with open(expected) as ex: + expected = json.load(ex) + assert results == expected diff --git a/purldb-toolkit/src/purldb_toolkit/purlcli.py b/purldb-toolkit/src/purldb_toolkit/purlcli.py index 290f2dd0..f75167fc 100644 --- a/purldb-toolkit/src/purldb_toolkit/purlcli.py +++ b/purldb-toolkit/src/purldb_toolkit/purlcli.py @@ -94,6 +94,7 @@ def get_metadata_details(purls, output, file, unique, command_name): input_purls = purls for purl in input_purls: + purl = purl.strip() if not purl: continue @@ -185,7 +186,7 @@ def construct_headers( headers_content["options"] = options headers_content["purls"] = purls - if command_name == "metadata" and unique: + if (command_name in ["metadata", "urls"]) and unique: for purl in normalized_purls: if purl[0] != purl[1]: warnings.append(f"input PURL: '{purl[0]}' normalized to '{purl[1]}'") @@ -245,7 +246,7 @@ def check_metadata_purl(purl): `warnings` field of the `header` section of the JSON object returned by the `metadata` command. """ - results = check_existence(purl) + results = validate_purls([purl])[0] if results["valid"] == False: return "not_valid" @@ -301,7 +302,6 @@ def check_metadata_purl(purl): required=False, help="Validate each URL's existence with a head request.", ) -# We're passing `unique` but it's not yet fully implemented here or in the `urls` tests. def get_urls(purls, output, file, unique, head): """ Given one or more PURLs, for each PURL, return a list of all known URLs @@ -315,28 +315,32 @@ def get_urls(purls, output, file, unique, head): context = click.get_current_context() command_name = context.command.name - urls_info = get_urls_details(purls, output, file, head, command_name) + urls_info = get_urls_details(purls, output, file, unique, head, command_name) json.dump(urls_info, output, indent=4) -def get_urls_details(purls, output, file, head, command_name): +def get_urls_details(purls, output, file, unique, head, command_name): """ Return a dictionary containing URLs for each PURL in the `purls` input list. `check_urls_purl()` will print an error message to the console (also displayed in the JSON output) when necessary. """ urls_details = {} - urls_details["headers"] = construct_headers( - purls=purls, - output=output, - file=file, - head=head, - command_name=command_name, - ) - + urls_details["headers"] = [] urls_details["packages"] = [] - for purl in purls: + normalized_purls = [] + input_purls = [] + if unique: + for purl in purls: + purl, normalized_purl = normalize_purl(purl) + normalized_purls.append((purl, normalized_purl)) + if normalized_purl not in input_purls: + input_purls.append(normalized_purl) + else: + input_purls = purls + + for purl in input_purls: url_detail = {} url_detail["purl"] = purl @@ -411,6 +415,16 @@ def get_urls_details(purls, output, file, head, command_name): urls_details["packages"].append(url_detail) + urls_details["headers"] = construct_headers( + purls=purls, + output=output, + file=file, + head=head, + command_name=command_name, + normalized_purls=normalized_purls, + unique=unique, + ) + return urls_details @@ -447,7 +461,7 @@ def check_urls_purl(purl): or its type is not supported (or not fully supported) by `urls`, or it does not exist in the upstream repo. """ - results = check_existence(purl) + results = validate_purls([purl])[0] if results["valid"] == False: return "not_valid" @@ -541,6 +555,25 @@ def validate(purls, output, file): def validate_purls(purls): + """ + Return a JSON object containing data regarding the validity of the input PURL. + + Based on packagedb.package_managers VERSION_API_CLASSES_BY_PACKAGE_TYPE + and packagedb/api.py class PurlValidateViewSet(viewsets.ViewSet) + -- and supported by testing the command -- it appears that the `validate` + command `check_existence` parameter supports the following PURL types: + + "cargo", + "composer", + "deb", + "gem", + "golang", + "hex", + "maven", + "npm", + "nuget", + "pypi", + """ api_query = "https://public.purldb.io/api/validate/" validated_purls = [] for purl in purls: @@ -549,43 +582,16 @@ def validate_purls(purls): continue request_body = {"purl": purl, "check_existence": True} response = requests.get(api_query, params=request_body) - results = response.json() + # results = response.json() + try: + results = response.json() + except: + return validated_purls.append(results) return validated_purls -def check_existence(purl): - """ - Return a JSON object containing data regarding the validity of the input PURL. - """ - - # Based on packagedb.package_managers VERSION_API_CLASSES_BY_PACKAGE_TYPE - # -- and supported by testing the command -- it appears that the `validate` - # command `check_existence` check supports the following PURL types: - - # validate_supported_ecosystems = [ - # "cargo", - # "composer", - # "deb", - # "gem", - # "golang", - # "hex", - # "maven", - # "npm", - # "nuget", - # "pypi", - # ] - - api_query = "https://public.purldb.io/api/validate/" - purl = purl.strip() - request_body = {"purl": purl, "check_existence": True} - response = requests.get(api_query, params=request_body) - results = response.json() - - return results - - # Not yet converted to a SCTK-like data structure. @purlcli.command(name="versions") @click.option( @@ -611,9 +617,6 @@ def check_existence(purl): def get_versions(purls, output, file): """ Given one or more PURLs, return a list of all known versions for each PURL. - - Version information is not needed in submitted PURLs and if included will - be removed before processing. """ check_for_duplicate_input_sources(purls, file) @@ -659,6 +662,11 @@ def list_versions(purls, output, file, command_name): print(f"'{purl}' does not exist in the upstream repo") continue + # TODO: Add to warnings and test it as well. + if command_name == "versions" and versions_purl == "error_fetching_purl": + print(f"Error fetching '{purl}'") + continue + for package_version_object in list(versions(purl)): purl_version_data = {} purl_version = package_version_object.to_dict()["value"] @@ -699,7 +707,10 @@ def check_versions_purl(purl): "pypi", ] """ - results = check_existence(purl) + results = validate_purls([purl])[0] + + if results is None: + return "error_fetching_purl" if results["valid"] == False: return "not_valid" diff --git a/purldb-toolkit/tests/data/purlcli/expected_urls_output.json b/purldb-toolkit/tests/data/purlcli/expected_urls_output.json index 7089f5bb..6cf3da0f 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_urls_output.json +++ b/purldb-toolkit/tests/data/purlcli/expected_urls_output.json @@ -23,14 +23,12 @@ "pkg:gem/bundler-sass", "pkg:rubygems/bundler-sass", "pkg:pypi/matchcode", - "pkg:rubygems/bundler-sass", "abcdefg", "pkg/abc", - "pkg:nuget/auth0-aspnet@1.1.0", - "pkg:cargo/socksprox" + "pkg:nuget/auth0-aspnet@1.1.0" ], "--file": null, - "--output": "/mnt/c/nexb/purldb-testing/2024-current-01-testing/json-output/2024-02-14-urls--output-01.json" + "--output": "" }, "purls": [ "pkg:pypi/fetchcode", @@ -50,11 +48,9 @@ "pkg:gem/bundler-sass", "pkg:rubygems/bundler-sass", "pkg:pypi/matchcode", - "pkg:rubygems/bundler-sass", "abcdefg", "pkg/abc", - "pkg:nuget/auth0-aspnet@1.1.0", - "pkg:cargo/socksprox" + "pkg:nuget/auth0-aspnet@1.1.0" ], "errors": [], "warnings": [ @@ -330,29 +326,6 @@ "url": "https://rubygems.org/gems/bundler-sass" } }, - { - "purl": "pkg:rubygems/bundler-sass", - "download_url": { - "url": null - }, - "inferred_urls": [ - { - "url": "https://rubygems.org/gems/bundler-sass" - } - ], - "repo_download_url": { - "url": null - }, - "repo_download_url_by_package_type": { - "url": null - }, - "repo_url": { - "url": "https://rubygems.org/gems/bundler-sass" - }, - "url": { - "url": "https://rubygems.org/gems/bundler-sass" - } - }, { "purl": "pkg:nuget/auth0-aspnet@1.1.0", "download_url": { @@ -378,29 +351,6 @@ "url": { "url": "https://www.nuget.org/packages/auth0-aspnet/1.1.0" } - }, - { - "purl": "pkg:cargo/socksprox", - "download_url": { - "url": null - }, - "inferred_urls": [ - { - "url": "https://crates.io/crates/socksprox" - } - ], - "repo_download_url": { - "url": null - }, - "repo_download_url_by_package_type": { - "url": null - }, - "repo_url": { - "url": "https://crates.io/crates/socksprox" - }, - "url": { - "url": "https://crates.io/crates/socksprox" - } } ] } diff --git a/purldb-toolkit/tests/data/purlcli/expected_urls_output_head.json b/purldb-toolkit/tests/data/purlcli/expected_urls_output_head.json index 37bab6c0..a8f2e3d8 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_urls_output_head.json +++ b/purldb-toolkit/tests/data/purlcli/expected_urls_output_head.json @@ -23,15 +23,13 @@ "pkg:gem/bundler-sass", "pkg:rubygems/bundler-sass", "pkg:pypi/matchcode", - "pkg:rubygems/bundler-sass", "abcdefg", "pkg/abc", - "pkg:nuget/auth0-aspnet@1.1.0", - "pkg:cargo/socksprox" + "pkg:nuget/auth0-aspnet@1.1.0" ], "--file": null, "--head": true, - "--output": "/mnt/c/nexb/purldb-testing/2024-current-01-testing/json-output/2024-02-14-urls--head--output-01.json" + "--output": "" }, "purls": [ "pkg:pypi/fetchcode", @@ -51,11 +49,9 @@ "pkg:gem/bundler-sass", "pkg:rubygems/bundler-sass", "pkg:pypi/matchcode", - "pkg:rubygems/bundler-sass", "abcdefg", "pkg/abc", - "pkg:nuget/auth0-aspnet@1.1.0", - "pkg:cargo/socksprox" + "pkg:nuget/auth0-aspnet@1.1.0" ], "errors": [], "warnings": [ @@ -463,41 +459,6 @@ "head_request_status_code": 200 } }, - { - "purl": "pkg:rubygems/bundler-sass", - "download_url": { - "url": null, - "get_request_status_code": "N/A", - "head_request_status_code": "N/A" - }, - "inferred_urls": [ - { - "url": "https://rubygems.org/gems/bundler-sass", - "get_request_status_code": 200, - "head_request_status_code": 200 - } - ], - "repo_download_url": { - "url": null, - "get_request_status_code": "N/A", - "head_request_status_code": "N/A" - }, - "repo_download_url_by_package_type": { - "url": null, - "get_request_status_code": "N/A", - "head_request_status_code": "N/A" - }, - "repo_url": { - "url": "https://rubygems.org/gems/bundler-sass", - "get_request_status_code": 200, - "head_request_status_code": 200 - }, - "url": { - "url": "https://rubygems.org/gems/bundler-sass", - "get_request_status_code": 200, - "head_request_status_code": 200 - } - }, { "purl": "pkg:nuget/auth0-aspnet@1.1.0", "download_url": { @@ -537,41 +498,6 @@ "get_request_status_code": 200, "head_request_status_code": 404 } - }, - { - "purl": "pkg:cargo/socksprox", - "download_url": { - "url": null, - "get_request_status_code": "N/A", - "head_request_status_code": "N/A" - }, - "inferred_urls": [ - { - "url": "https://crates.io/crates/socksprox", - "get_request_status_code": 404, - "head_request_status_code": 404 - } - ], - "repo_download_url": { - "url": null, - "get_request_status_code": "N/A", - "head_request_status_code": "N/A" - }, - "repo_download_url_by_package_type": { - "url": null, - "get_request_status_code": "N/A", - "head_request_status_code": "N/A" - }, - "repo_url": { - "url": "https://crates.io/crates/socksprox", - "get_request_status_code": 404, - "head_request_status_code": 404 - }, - "url": { - "url": "https://crates.io/crates/socksprox", - "get_request_status_code": 404, - "head_request_status_code": 404 - } } ] } diff --git a/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json b/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json new file mode 100644 index 00000000..5eaaa483 --- /dev/null +++ b/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json @@ -0,0 +1,249 @@ +{ + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.1.0", + "options": { + "command": "urls", + "--purl": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@5.0.0", + "pkg:pypi/dejacode", + "pkg:pypi/dejacode@5.0.0", + "pkg:pypi/dejacode@5.0.0?os=windows", + "pkg:pypi/dejacode@5.0.0os=windows", + "pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy", + "pkg:pypi/dejacode@5.0.0#how/are/you", + "pkg:pypi/dejacode@10.0.0", + "pkg:cargo/banquo", + "pkg:cargo/socksprox", + "pkg:nginx/nginx", + "pkg:nginx/nginx@0.8.9?os=windows", + "pkg:gem/bundler-sass", + "pkg:rubygems/bundler-sass", + "pkg:pypi/matchcode", + "abcdefg", + "pkg/abc", + "pkg:nuget/auth0-aspnet@1.1.0" + ], + "--file": null, + "--unique": true, + "--output": "" + }, + "purls": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@5.0.0", + "pkg:pypi/dejacode", + "pkg:pypi/dejacode@5.0.0", + "pkg:pypi/dejacode@5.0.0?os=windows", + "pkg:pypi/dejacode@5.0.0os=windows", + "pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy", + "pkg:pypi/dejacode@5.0.0#how/are/you", + "pkg:pypi/dejacode@10.0.0", + "pkg:cargo/banquo", + "pkg:cargo/socksprox", + "pkg:nginx/nginx", + "pkg:nginx/nginx@0.8.9?os=windows", + "pkg:gem/bundler-sass", + "pkg:rubygems/bundler-sass", + "pkg:pypi/matchcode", + "abcdefg", + "pkg/abc", + "pkg:nuget/auth0-aspnet@1.1.0" + ], + "errors": [], + "warnings": [ + "input PURL: 'pkg:pypi/fetchcode@0.3.0' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@5.0.0' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/dejacode@5.0.0' normalized to 'pkg:pypi/dejacode'", + "input PURL: 'pkg:pypi/dejacode@5.0.0?os=windows' normalized to 'pkg:pypi/dejacode'", + "input PURL: 'pkg:pypi/dejacode@5.0.0os=windows' normalized to 'pkg:pypi/dejacode'", + "input PURL: 'pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy' normalized to 'pkg:pypi/dejacode'", + "input PURL: 'pkg:pypi/dejacode@5.0.0#how/are/you' normalized to 'pkg:pypi/dejacode'", + "input PURL: 'pkg:pypi/dejacode@10.0.0' normalized to 'pkg:pypi/dejacode'", + "input PURL: 'pkg:nginx/nginx@0.8.9?os=windows' normalized to 'pkg:nginx/nginx'", + "input PURL: 'pkg:nuget/auth0-aspnet@1.1.0' normalized to 'pkg:nuget/auth0-aspnet'", + "'pkg:pypi/fetchcode' not fully supported with `urls` command", + "'pkg:pypi/fetchcode@0.3.0' not fully supported with `urls` command", + "'pkg:pypi/fetchcode@5.0.0' does not exist in the upstream repo", + "'pkg:pypi/dejacode' not fully supported with `urls` command", + "'pkg:pypi/dejacode@5.0.0' not fully supported with `urls` command", + "'pkg:pypi/dejacode@5.0.0?os=windows' not fully supported with `urls` command", + "'pkg:pypi/dejacode@5.0.0os=windows' does not exist in the upstream repo", + "'pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy' not fully supported with `urls` command", + "'pkg:pypi/dejacode@5.0.0#how/are/you' not fully supported with `urls` command", + "'pkg:pypi/dejacode@10.0.0' does not exist in the upstream repo", + "'pkg:nginx/nginx' not supported with `urls` command", + "'pkg:nginx/nginx@0.8.9?os=windows' not supported with `urls` command", + "'pkg:pypi/matchcode' does not exist in the upstream repo", + "'abcdefg' not valid", + "'pkg/abc' not valid" + ] + } + ], + "packages": [ + { + "purl": "pkg:pypi/fetchcode", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://pypi.org/project/fetchcode/" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://pypi.org/project/fetchcode/" + }, + "url": { + "url": "https://pypi.org/project/fetchcode/" + } + }, + { + "purl": "pkg:pypi/dejacode", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://pypi.org/project/dejacode/" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://pypi.org/project/dejacode/" + }, + "url": { + "url": "https://pypi.org/project/dejacode/" + } + }, + { + "purl": "pkg:cargo/banquo", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://crates.io/crates/banquo" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://crates.io/crates/banquo" + }, + "url": { + "url": "https://crates.io/crates/banquo" + } + }, + { + "purl": "pkg:cargo/socksprox", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://crates.io/crates/socksprox" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://crates.io/crates/socksprox" + }, + "url": { + "url": "https://crates.io/crates/socksprox" + } + }, + { + "purl": "pkg:gem/bundler-sass", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://rubygems.org/gems/bundler-sass" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://rubygems.org/gems/bundler-sass" + }, + "url": { + "url": "https://rubygems.org/gems/bundler-sass" + } + }, + { + "purl": "pkg:rubygems/bundler-sass", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://rubygems.org/gems/bundler-sass" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://rubygems.org/gems/bundler-sass" + }, + "url": { + "url": "https://rubygems.org/gems/bundler-sass" + } + }, + { + "purl": "pkg:nuget/auth0-aspnet", + "download_url": { + "url": null + }, + "inferred_urls": [ + { + "url": "https://www.nuget.org/packages/auth0-aspnet" + } + ], + "repo_download_url": { + "url": null + }, + "repo_download_url_by_package_type": { + "url": null + }, + "repo_url": { + "url": "https://www.nuget.org/packages/auth0-aspnet" + }, + "url": { + "url": "https://www.nuget.org/packages/auth0-aspnet" + } + } + ] +} diff --git a/purldb-toolkit/tests/test_purlcli.py b/purldb-toolkit/tests/test_purlcli.py index 257cced5..94d766a7 100644 --- a/purldb-toolkit/tests/test_purlcli.py +++ b/purldb-toolkit/tests/test_purlcli.py @@ -15,7 +15,7 @@ import pytest from click.testing import CliRunner from commoncode.testcase import FileDrivenTesting -from purldb_toolkit import purlcli +from purldb_toolkit import cli_test_utils, purlcli test_env = FileDrivenTesting() test_env.test_data_dir = os.path.join(os.path.dirname(__file__), "data") @@ -56,19 +56,19 @@ def test_metadata_cli(self): f_output = open(actual_result_file) output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) + streamline_metadata_packages(output_data["packages"]) f_expected = open(expected_result_file) expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) + streamline_metadata_packages(expected_data["packages"]) result_objects = [ ( output_data["headers"][0]["tool_name"], expected_data["headers"][0]["tool_name"], ), - ( - output_data["headers"][0]["tool_version"], - expected_data["headers"][0]["tool_version"], - ), (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( output_data["headers"][0]["warnings"], @@ -96,6 +96,16 @@ def test_metadata_cli(self): for output, expected in result_objects: assert output == expected + """ + QUESTION: Is this a better way to test the contents of `packages`? + We already remove some dynamic fields like `download_url`, but + `metadata` also adds new versions as they appear. The below approach + avoids an error from a new version while checking whether the existing + expected versions still appear in the result data. + """ + for expected in expected_data["packages"]: + assert expected in output_data["packages"] + def test_metadata_cli_unique(self): """ Test the `metadata` command with actual and expected JSON output files @@ -128,19 +138,19 @@ def test_metadata_cli_unique(self): f_output = open(actual_result_file) output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) + streamline_metadata_packages(output_data["packages"]) f_expected = open(expected_result_file) expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) + streamline_metadata_packages(expected_data["packages"]) result_objects = [ ( output_data["headers"][0]["tool_name"], expected_data["headers"][0]["tool_name"], ), - ( - output_data["headers"][0]["tool_version"], - expected_data["headers"][0]["tool_version"], - ), (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( output_data["headers"][0]["warnings"], @@ -162,10 +172,6 @@ def test_metadata_cli_unique(self): output_data["headers"][0]["options"]["--file"], expected_data["headers"][0]["options"]["--file"], ), - ( - output_data["headers"][0]["options"]["--file"], - expected_data["headers"][0]["options"]["--file"], - ), ( output_data["headers"][0]["options"]["--unique"], expected_data["headers"][0]["options"]["--unique"], @@ -176,6 +182,13 @@ def test_metadata_cli_unique(self): for output, expected in result_objects: assert output == expected + """ + QUESTION: Is this a better way to test the contents of `packages`? + See point under test_metadata_cli() re addition of new versions. + """ + for expected in expected_data["packages"]: + assert expected in output_data["packages"] + def test_metadata_cli_duplicate_input_sources(self): """ Test the `metadata` command with both `--purl` and `--file` inputs. @@ -574,8 +587,23 @@ def test_metadata_details(self, test_input, expected): command_name="metadata", unique=False, ) + cli_test_utils.streamline_headers(purl_metadata["headers"]) + streamline_metadata_packages(purl_metadata["packages"]) + + cli_test_utils.streamline_headers(expected["headers"]) + streamline_metadata_packages(expected["packages"]) + assert purl_metadata == expected + """ + QUESTION: Is this a better way to test the contents of `packages`? + See note under test_metadata_cli() re addition of new versions. + """ + assert purl_metadata["headers"] == expected["headers"] + + for expected in expected["packages"]: + assert expected in purl_metadata["packages"] + @pytest.mark.parametrize( "test_input,expected", [ @@ -700,6 +728,9 @@ def test_construct_headers(self, test_input, expected): normalized_purls=None, unique=None, ) + cli_test_utils.streamline_headers(expected) + cli_test_utils.streamline_headers(metadata_headers) + assert metadata_headers == expected @pytest.mark.parametrize( @@ -760,6 +791,9 @@ def test_construct_headers_unique(self, test_input, expected): ], unique=True, ) + cli_test_utils.streamline_headers(expected) + cli_test_utils.streamline_headers(metadata_headers) + assert metadata_headers == expected @@ -772,7 +806,6 @@ def test_urls_cli(self): because the `--output` values (paths) differ due to the use of temporary files, and therefore we test a list of relevant key-value pairs. """ - # NOTE: options do not yet include `unique`. expected_result_file = test_env.get_test_loc( "purlcli/expected_urls_output.json" ) @@ -813,15 +846,11 @@ def test_urls_cli(self): "--purl", "pkg:pypi/matchcode", "--purl", - "pkg:rubygems/bundler-sass", - "--purl", "abcdefg", "--purl", "pkg/abc", "--purl", "pkg:nuget/auth0-aspnet@1.1.0", - "--purl", - "pkg:cargo/socksprox", "--output", actual_result_file, ] @@ -831,18 +860,117 @@ def test_urls_cli(self): f_output = open(actual_result_file) output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) f_expected = open(expected_result_file) expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) result_objects = [ ( output_data["headers"][0]["tool_name"], expected_data["headers"][0]["tool_name"], ), + (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( - output_data["headers"][0]["tool_version"], - expected_data["headers"][0]["tool_version"], + output_data["headers"][0]["warnings"], + expected_data["headers"][0]["warnings"], + ), + ( + output_data["headers"][0]["errors"], + expected_data["headers"][0]["errors"], + ), + ( + output_data["headers"][0]["options"]["command"], + expected_data["headers"][0]["options"]["command"], + ), + ( + output_data["headers"][0]["options"]["--purl"], + expected_data["headers"][0]["options"]["--purl"], + ), + ( + output_data["headers"][0]["options"]["--file"], + expected_data["headers"][0]["options"]["--file"], + ), + (output_data["packages"], expected_data["packages"]), + ] + + for output, expected in result_objects: + assert output == expected + + def test_urls_cli_unique(self): + """ + Test the `urls` command with actual and expected JSON output files. + + Note that we can't simply compare the actual and expected JSON files + because the `--output` values (paths) differ due to the use of + temporary files, and therefore we test a list of relevant key-value pairs. + """ + expected_result_file = test_env.get_test_loc( + "purlcli/expected_urls_output_unique.json" + ) + actual_result_file = test_env.get_temp_file("actual_urls_output_unique.json") + options = [ + "--purl", + "pkg:pypi/fetchcode", + "--purl", + "pkg:pypi/fetchcode@0.3.0", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", + "pkg:pypi/dejacode", + "--purl", + "pkg:pypi/dejacode@5.0.0", + "--purl", + "pkg:pypi/dejacode@5.0.0?os=windows", + "--purl", + "pkg:pypi/dejacode@5.0.0os=windows", + "--purl", + "pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy", + "--purl", + "pkg:pypi/dejacode@5.0.0#how/are/you", + "--purl", + "pkg:pypi/dejacode@10.0.0", + "--purl", + "pkg:cargo/banquo", + "--purl", + "pkg:cargo/socksprox", + "--purl", + "pkg:nginx/nginx", + "--purl", + "pkg:nginx/nginx@0.8.9?os=windows", + "--purl", + "pkg:gem/bundler-sass", + "--purl", + "pkg:rubygems/bundler-sass", + "--purl", + "pkg:pypi/matchcode", + "--purl", + "abcdefg", + "--purl", + "pkg/abc", + "--purl", + "pkg:nuget/auth0-aspnet@1.1.0", + "--output", + actual_result_file, + "--unique", + ] + runner = CliRunner() + result = runner.invoke(purlcli.get_urls, options, catch_exceptions=False) + assert result.exit_code == 0 + + f_output = open(actual_result_file) + output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) + + f_expected = open(expected_result_file) + expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) + + result_objects = [ + ( + output_data["headers"][0]["tool_name"], + expected_data["headers"][0]["tool_name"], ), (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( @@ -879,7 +1007,6 @@ def test_urls_cli_head(self): because the `--output` values (paths) differ due to the use of temporary files, and therefore we test a list of relevant key-value pairs. """ - # NOTE: options do not yet include `unique`. expected_result_file = test_env.get_test_loc( "purlcli/expected_urls_output_head.json" ) @@ -920,15 +1047,11 @@ def test_urls_cli_head(self): "--purl", "pkg:pypi/matchcode", "--purl", - "pkg:rubygems/bundler-sass", - "--purl", "abcdefg", "--purl", "pkg/abc", "--purl", "pkg:nuget/auth0-aspnet@1.1.0", - "--purl", - "pkg:cargo/socksprox", "--head", "--output", actual_result_file, @@ -939,19 +1062,17 @@ def test_urls_cli_head(self): f_output = open(actual_result_file) output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) f_expected = open(expected_result_file) expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) result_objects = [ ( output_data["headers"][0]["tool_name"], expected_data["headers"][0]["tool_name"], ), - ( - output_data["headers"][0]["tool_version"], - expected_data["headers"][0]["tool_version"], - ), (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( output_data["headers"][0]["warnings"], @@ -1276,7 +1397,11 @@ def test_urls_details(self, test_input, expected): file="", command_name="urls", head=False, + unique=False, ) + cli_test_utils.streamline_headers(expected["headers"]) + cli_test_utils.streamline_headers(purl_urls["headers"]) + assert purl_urls == expected @pytest.mark.parametrize( @@ -1316,7 +1441,35 @@ def test_check_urls_purl(self, test_input, expected): purl_urls = purlcli.check_urls_purl(test_input[0]) assert purl_urls == expected - # TODO: test_make_head_request() + @pytest.mark.parametrize( + "test_input,expected", + [ + ( + ["https://pypi.org/project/fetchcode/"], + {"get_request": 200, "head_request": 200}, + ), + ( + [None], + {"get_request": "N/A", "head_request": "N/A"}, + ), + ( + ["https://crates.io/crates/banquo"], + {"get_request": 404, "head_request": 404}, + ), + ( + ["https://crates.io/crates/socksprox"], + {"get_request": 404, "head_request": 404}, + ), + ( + ["https://www.nuget.org/api/v2/package/auth0-aspnet/1.1.0"], + {"get_request": 200, "head_request": 404}, + ), + ], + ) + def test_make_head_request(self, test_input, expected): + purl_status_code = purlcli.make_head_request(test_input[0]) + + assert purl_status_code == expected # TODO: not yet converted to a SCTK-like data structure. @@ -1572,6 +1725,7 @@ def test_versions(self, test_input, expected): command_name = "versions" purl_versions = purlcli.list_versions(test_input, output, file, command_name) + # TODO: consider `expected in purl_versions` instead of `purl_versions == expected` ==> handles dynamic data in the result better. assert purl_versions == expected @pytest.mark.parametrize( @@ -1606,15 +1760,20 @@ def test_versions(self, test_input, expected): "not_valid", ), ( - ["pkg:deb/debian/2ping"], + ["pkg:maven/axis/axis@1.0"], None, ), - ( - ["pkg:deb/2ping"], - "valid_but_not_supported", - ), ], ) def test_check_versions_purl(self, test_input, expected): purl_versions = purlcli.check_versions_purl(test_input[0]) assert purl_versions == expected + + +def streamline_metadata_packages(packages): + """ + Modify the `packages` list of `metadata` mappings in place to make it easier to test. + """ + for hle in packages: + hle.pop("code_view_url", None) + hle.pop("download_url", None) From e8c9abb39c220c0f0d13221646988993703634c3 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Tue, 27 Feb 2024 17:57:48 -0800 Subject: [PATCH 2/6] Refactor warnings, header construction, purl normalization and related code and tests #247 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- purldb-toolkit/src/purldb_toolkit/purlcli.py | 243 ++++++++++-------- .../purlcli/expected_metadata_output.json | 154 +++++++++++ .../expected_metadata_output_unique.json | 9 + .../purlcli/expected_urls_output_unique.json | 9 - purldb-toolkit/tests/test_purlcli.py | 68 +++-- 5 files changed, 343 insertions(+), 140 deletions(-) diff --git a/purldb-toolkit/src/purldb_toolkit/purlcli.py b/purldb-toolkit/src/purldb_toolkit/purlcli.py index f75167fc..dfc185e2 100644 --- a/purldb-toolkit/src/purldb_toolkit/purlcli.py +++ b/purldb-toolkit/src/purldb_toolkit/purlcli.py @@ -82,14 +82,14 @@ def get_metadata_details(purls, output, file, unique, command_name): metadata_details["headers"] = [] metadata_details["packages"] = [] - normalized_purls = [] + metadata_warnings = {} + input_purls = [] + normalized_purls = [] if unique: - for purl in purls: - purl, normalized_purl = normalize_purl(purl) - normalized_purls.append((purl, normalized_purl)) - if normalized_purl not in input_purls: - input_purls.append(normalized_purl) + input_purls, normalized_purls = normalize_purls( + purls, input_purls, normalized_purls + ) else: input_purls = purls @@ -100,16 +100,8 @@ def get_metadata_details(purls, output, file, unique, command_name): metadata_purl = check_metadata_purl(purl) - if command_name == "metadata" and metadata_purl == "not_valid": - print(f"'{purl}' not valid") - continue - - if command_name == "metadata" and metadata_purl == "valid_but_not_supported": - print(f"'{purl}' not supported with `metadata` command") - continue - - if command_name == "metadata" and metadata_purl == "not_in_upstream_repo": - print(f"'{purl}' does not exist in the upstream repo") + if command_name == "metadata" and metadata_purl: + metadata_warnings[purl] = metadata_purl continue for release in list(info(purl)): @@ -124,21 +116,61 @@ def get_metadata_details(purls, output, file, unique, command_name): command_name=command_name, normalized_purls=normalized_purls, unique=unique, + purl_warnings=metadata_warnings, ) return metadata_details -def normalize_purl(purl): +def check_metadata_purl(purl): """ - Remove substrings that start with the '@', '?' or '#' separators. + Return a variable identifying the message for printing to the console by + get_metadata_details() if (1) the input PURL is invalid, (2) its type is not + supported by `metadata` or (3) its existence was not validated (e.g., + "does not exist in the upstream repo"). + + This message will also be reported by construct_headers() in the + `warnings` field of the `header` section of the JSON object returned by + the `metadata` command. """ - input_purl = purl - purl = purl.strip() - purl = re.split("[@,?,#,]+", purl)[0] - normalized_purl = purl + check_validation = validate_purls([purl]) + if check_validation is None: + return "validation_error" + results = check_validation[0] + + if results["valid"] == False: + return "not_valid" + + # This is manually constructed from a visual inspection of fetchcode/package.py. + metadata_supported_ecosystems = [ + "bitbucket", + "cargo", + "github", + "npm", + "pypi", + "rubygems", + ] + metadata_purl = PackageURL.from_string(purl) + + if metadata_purl.type not in metadata_supported_ecosystems: + return "valid_but_not_supported" + + if results["exists"] == False: + return "not_in_upstream_repo" + - return input_purl, normalized_purl +def normalize_purls(purls, input_purls, normalized_purls): + for purl in purls: + input_purl = purl + purl = purl.strip() + purl = re.split("[@,?,#,]+", purl)[0] + normalized_purl = purl + + normalized_purls.append((input_purl, normalized_purl)) + if normalized_purl not in input_purls: + input_purls.append(normalized_purl) + + return input_purls, normalized_purls def construct_headers( @@ -149,6 +181,7 @@ def construct_headers( head=None, normalized_purls=None, unique=None, + purl_warnings=None, ): """ Return a list comprising the `headers` content of the dictionary output. @@ -195,38 +228,32 @@ def construct_headers( if not purl: continue - # `metadata` warnings: - metadata_purl = check_metadata_purl(purl) - - if command_name == "metadata" and metadata_purl == "not_valid": - warnings.append(f"'{purl}' not valid") - continue - - if command_name == "metadata" and metadata_purl == "valid_but_not_supported": - warnings.append(f"'{purl}' not supported with `metadata` command") - continue + warning_text = { + "validation_error": f"'{purl}' encountered a validation error", + "not_valid": f"'{purl}' not valid", + "valid_but_not_supported": f"'{purl}' not supported with `{command_name}` command", + "valid_but_not_fully_supported": f"'{purl}' not fully supported with `urls` command", + "not_in_upstream_repo": f"'{purl}' does not exist in the upstream repo", + } - if command_name == "metadata" and metadata_purl == "not_in_upstream_repo": - warnings.append(f"'{purl}' does not exist in the upstream repo") - continue + # `metadata` warnings: + if command_name == "metadata": + purl_warning = purl_warnings.get(purl, None) + if purl_warning: + warnings.append(warning_text[purl_warning]) + print(warning_text[purl_warning]) + continue # `urls` warnings: - urls_purl = check_urls_purl(purl) - - if command_name == "urls" and urls_purl == "not_valid": - warnings.append(f"'{purl}' not valid") - continue + if command_name == "urls": + purl_warning = purl_warnings.get(purl, None) + if purl_warning: + warnings.append(warning_text[purl_warning]) + print(warning_text[purl_warning]) + continue - if command_name == "urls" and urls_purl == "valid_but_not_supported": - warnings.append(f"'{purl}' not supported with `urls` command") - continue - - if command_name == "urls" and urls_purl == "valid_but_not_fully_supported": - warnings.append(f"'{purl}' not fully supported with `urls` command") - - if command_name == "urls" and urls_purl == "not_in_upstream_repo": - warnings.append(f"'{purl}' does not exist in the upstream repo") - continue + # add `versions` warnings here + # it's not yet clear whether `validate` will have any similar warnings headers_content["errors"] = errors headers_content["warnings"] = warnings @@ -235,40 +262,6 @@ def construct_headers( return headers -def check_metadata_purl(purl): - """ - Return a variable identifying the message for printing to the console by - get_metadata_details() if (1) the input PURL is invalid, (2) its type is not - supported by `metadata` or (3) its existence was not validated (e.g., - "does not exist in the upstream repo"). - - This message will also be reported by construct_headers() in the - `warnings` field of the `header` section of the JSON object returned by - the `metadata` command. - """ - results = validate_purls([purl])[0] - - if results["valid"] == False: - return "not_valid" - - # This is manually constructed from a visual inspection of fetchcode/package.py. - metadata_supported_ecosystems = [ - "bitbucket", - "cargo", - "github", - "npm", - "pypi", - "rubygems", - ] - metadata_purl = PackageURL.from_string(purl) - - if metadata_purl.type not in metadata_supported_ecosystems: - return "valid_but_not_supported" - - if results["exists"] == False: - return "not_in_upstream_repo" - - @purlcli.command(name="urls") @click.option( "--purl", @@ -329,14 +322,14 @@ def get_urls_details(purls, output, file, unique, head, command_name): urls_details["headers"] = [] urls_details["packages"] = [] - normalized_purls = [] + urls_warnings = {} + input_purls = [] + normalized_purls = [] if unique: - for purl in purls: - purl, normalized_purl = normalize_purl(purl) - normalized_purls.append((purl, normalized_purl)) - if normalized_purl not in input_purls: - input_purls.append(normalized_purl) + input_purls, normalized_purls = normalize_purls( + purls, input_purls, normalized_purls + ) else: input_purls = purls @@ -350,21 +343,17 @@ def get_urls_details(purls, output, file, unique, head, command_name): urls_purl = check_urls_purl(purl) - # Print warnings to terminal. - if command_name == "urls" and urls_purl == "not_valid": - print(f"'{purl}' not valid") - continue - - if command_name == "urls" and urls_purl == "valid_but_not_supported": - print(f"'{purl}' not supported with `urls` command") + if command_name == "urls" and urls_purl in [ + "validation_error", + "not_valid", + "valid_but_not_supported", + "not_in_upstream_repo", + ]: + urls_warnings[purl] = urls_purl continue - if command_name == "urls" and urls_purl == "valid_but_not_fully_supported": - print(f"'{purl}' not fully supported with `urls` command") - - if command_name == "urls" and urls_purl == "not_in_upstream_repo": - print(f"'{purl}' does not exist in the upstream repo") - continue + if command_name == "urls" and urls_purl in ["valid_but_not_fully_supported"]: + urls_warnings[purl] = urls_purl # Add the URLs. url_purl = PackageURL.from_string(purl) @@ -423,6 +412,7 @@ def get_urls_details(purls, output, file, unique, head, command_name): command_name=command_name, normalized_purls=normalized_purls, unique=unique, + purl_warnings=urls_warnings, ) return urls_details @@ -461,7 +451,10 @@ def check_urls_purl(purl): or its type is not supported (or not fully supported) by `urls`, or it does not exist in the upstream repo. """ - results = validate_purls([purl])[0] + check_validation = validate_purls([purl]) + if check_validation is None: + return "validation_error" + results = check_validation[0] if results["valid"] == False: return "not_valid" @@ -582,10 +575,16 @@ def validate_purls(purls): continue request_body = {"purl": purl, "check_existence": True} response = requests.get(api_query, params=request_body) - # results = response.json() try: results = response.json() - except: + # print(f"response - {response}") + # print(f"response.text - {response.text}") + # print(f"response.json() - {response.json()}") + except Exception as e: + print(f"'validate' endpoint error for '{purl}': {e}") + # print(f"response - {response}") + # print(f"response.text - {response.text}") + # print(f"response.json() - {response.json()}") return validated_purls.append(results) @@ -663,10 +662,19 @@ def list_versions(purls, output, file, command_name): continue # TODO: Add to warnings and test it as well. - if command_name == "versions" and versions_purl == "error_fetching_purl": - print(f"Error fetching '{purl}'") + if command_name == "versions" and versions_purl == "validation_error": + print(f"'{purl}' encountered a validation error") continue + # TODO: Is this needed to catch the intermittent fetchcode/package_versions.py versions()/get_response() `Error while fetching` error? I don't think so. + # if command_name == "versions" and versions_purl == "error_fetching_purl": + # print(f"Error fetching '{purl}'") + + # TODO: Is the subsumed by the preceding `validation_error`? I think YES. + # if versions(purl) is None: + # print(f"{purl} encountered a versions(purl) error") + # continue + for package_version_object in list(versions(purl)): purl_version_data = {} purl_version = package_version_object.to_dict()["value"] @@ -707,10 +715,19 @@ def check_versions_purl(purl): "pypi", ] """ - results = validate_purls([purl])[0] - - if results is None: - return "error_fetching_purl" + check_validation = validate_purls([purl]) + if check_validation is None: + return "validation_error" + results = check_validation[0] + + # TODO: Is this needed to catch the intermittent fetchcode/package_versions.py versions()/get_response() `Error while fetching` error? No, it does not catch that error. + # 2024-02-27 Tuesday 16:43:54. Just got one: + # (venv) Tue Feb 27, 2024 04:40 PM /home/jmh/dev/nexb/purldb jmh (247-purlcli-update-validate-and-versions) + # $ python -m purldb_toolkit.purlcli versions --purl pkg:gem/bundler-sass --purl pkg:deb/debian/2ping --output - + # Error while fetching 'https://sources.debian.org/api/src/2ping': 503 + # Traceback (most recent call last): + # if results is None: + # return "error_fetching_purl" if results["valid"] == False: return "not_valid" diff --git a/purldb-toolkit/tests/data/purlcli/expected_metadata_output.json b/purldb-toolkit/tests/data/purlcli/expected_metadata_output.json index 8e87ed86..a237dae1 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_metadata_output.json +++ b/purldb-toolkit/tests/data/purlcli/expected_metadata_output.json @@ -8,6 +8,9 @@ "--purl": [ "pkg:pypi/fetchcode", "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", "pkg:cargo/banquo", "pkg:nginx/nginx", "pkg:gem/rails", @@ -19,6 +22,9 @@ "purls": [ "pkg:pypi/fetchcode", "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", "pkg:cargo/banquo", "pkg:nginx/nginx", "pkg:gem/rails", @@ -26,6 +32,8 @@ ], "errors": [], "warnings": [ + "'pkg:pypi/fetchcode@0.3.0os=windows' does not exist in the upstream repo", + "'pkg:pypi/fetchcode@5.0.0' does not exist in the upstream repo", "'pkg:nginx/nginx' not supported with `metadata` command", "'pkg:gem/rails' not supported with `metadata` command" ] @@ -320,6 +328,152 @@ "repository_download_url": null, "api_data_url": null }, + { + "purl": "pkg:pypi/fetchcode@0.3.0?os=windows", + "type": "pypi", + "namespace": null, + "name": "fetchcode", + "version": "0.3.0", + "qualifiers": { + "os": "windows" + }, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": "https://github.com/nexB/fetchcode", + "download_url": null, + "api_url": "https://pypi.org/pypi/fetchcode/json", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": "Apache-2.0", + "notice_text": null, + "root_path": null, + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + }, + { + "purl": "pkg:pypi/fetchcode@0.1.0", + "type": "pypi", + "namespace": null, + "name": "fetchcode", + "version": "0.1.0", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": "https://github.com/nexB/fetchcode", + "download_url": "https://files.pythonhosted.org/packages/19/a0/c90e5ba4d71ea1a1a89784f6d839ffb0dbf32d270cba04d5602188cb3713/fetchcode-0.1.0-py3-none-any.whl", + "api_url": "https://pypi.org/pypi/fetchcode/json", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": "Apache-2.0", + "notice_text": null, + "root_path": null, + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + }, + { + "purl": "pkg:pypi/fetchcode@0.2.0", + "type": "pypi", + "namespace": null, + "name": "fetchcode", + "version": "0.2.0", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": "https://github.com/nexB/fetchcode", + "download_url": "https://files.pythonhosted.org/packages/d7/e9/96e9302e84e326b3c10a40c1723f21f4db96b557a17c6871e7a4c6336906/fetchcode-0.2.0-py3-none-any.whl", + "api_url": "https://pypi.org/pypi/fetchcode/json", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": "Apache-2.0", + "notice_text": null, + "root_path": null, + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "type": "pypi", + "namespace": null, + "name": "fetchcode", + "version": "0.3.0", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": "https://github.com/nexB/fetchcode", + "download_url": "https://files.pythonhosted.org/packages/8d/fb/e45da0abf63504c3f88ad02537dc9dc64ea5206b09ce29cfb8191420d678/fetchcode-0.3.0-py3-none-any.whl", + "api_url": "https://pypi.org/pypi/fetchcode/json", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": "Apache-2.0", + "notice_text": null, + "root_path": null, + "dependencies": [], + "contains_source_code": null, + "source_packages": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + }, { "purl": "pkg:cargo/banquo", "type": "cargo", diff --git a/purldb-toolkit/tests/data/purlcli/expected_metadata_output_unique.json b/purldb-toolkit/tests/data/purlcli/expected_metadata_output_unique.json index 8694c373..eaf57f8c 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_metadata_output_unique.json +++ b/purldb-toolkit/tests/data/purlcli/expected_metadata_output_unique.json @@ -8,6 +8,9 @@ "--purl": [ "pkg:pypi/fetchcode", "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", "pkg:cargo/banquo", "pkg:nginx/nginx", "pkg:gem/rails", @@ -20,6 +23,9 @@ "purls": [ "pkg:pypi/fetchcode", "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", "pkg:cargo/banquo", "pkg:nginx/nginx", "pkg:gem/rails", @@ -28,6 +34,9 @@ "errors": [], "warnings": [ "input PURL: 'pkg:pypi/fetchcode@0.3.0' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@0.3.0?os=windows' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@0.3.0os=windows' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@5.0.0' normalized to 'pkg:pypi/fetchcode'", "'pkg:nginx/nginx' not supported with `metadata` command", "'pkg:gem/rails' not supported with `metadata` command" ] diff --git a/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json b/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json index 5eaaa483..193b8096 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json +++ b/purldb-toolkit/tests/data/purlcli/expected_urls_output_unique.json @@ -66,17 +66,8 @@ "input PURL: 'pkg:nginx/nginx@0.8.9?os=windows' normalized to 'pkg:nginx/nginx'", "input PURL: 'pkg:nuget/auth0-aspnet@1.1.0' normalized to 'pkg:nuget/auth0-aspnet'", "'pkg:pypi/fetchcode' not fully supported with `urls` command", - "'pkg:pypi/fetchcode@0.3.0' not fully supported with `urls` command", - "'pkg:pypi/fetchcode@5.0.0' does not exist in the upstream repo", "'pkg:pypi/dejacode' not fully supported with `urls` command", - "'pkg:pypi/dejacode@5.0.0' not fully supported with `urls` command", - "'pkg:pypi/dejacode@5.0.0?os=windows' not fully supported with `urls` command", - "'pkg:pypi/dejacode@5.0.0os=windows' does not exist in the upstream repo", - "'pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy' not fully supported with `urls` command", - "'pkg:pypi/dejacode@5.0.0#how/are/you' not fully supported with `urls` command", - "'pkg:pypi/dejacode@10.0.0' does not exist in the upstream repo", "'pkg:nginx/nginx' not supported with `urls` command", - "'pkg:nginx/nginx@0.8.9?os=windows' not supported with `urls` command", "'pkg:pypi/matchcode' does not exist in the upstream repo", "'abcdefg' not valid", "'pkg/abc' not valid" diff --git a/purldb-toolkit/tests/test_purlcli.py b/purldb-toolkit/tests/test_purlcli.py index 94d766a7..6aa3b13f 100644 --- a/purldb-toolkit/tests/test_purlcli.py +++ b/purldb-toolkit/tests/test_purlcli.py @@ -13,6 +13,7 @@ import click import pytest +import requests from click.testing import CliRunner from commoncode.testcase import FileDrivenTesting from purldb_toolkit import cli_test_utils, purlcli @@ -40,6 +41,12 @@ def test_metadata_cli(self): "--purl", "pkg:pypi/fetchcode@0.3.0", "--purl", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "--purl", + "pkg:pypi/fetchcode@0.3.0os=windows", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", "pkg:cargo/banquo", "--purl", "pkg:nginx/nginx", @@ -121,6 +128,12 @@ def test_metadata_cli_unique(self): "--purl", "pkg:pypi/fetchcode@0.3.0", "--purl", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "--purl", + "pkg:pypi/fetchcode@0.3.0os=windows", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", "pkg:cargo/banquo", "--purl", "pkg:nginx/nginx", @@ -642,41 +655,58 @@ def test_check_metadata_purl(self, test_input, expected): assert purl_metadata == expected @pytest.mark.parametrize( - "test_input,expected", + "test_input,expected_input_purls,expected_normalized_purls", [ ( - ["pkg:pypi/fetchcode"], - ("pkg:pypi/fetchcode", "pkg:pypi/fetchcode"), + [["pkg:pypi/fetchcode"]], + (["pkg:pypi/fetchcode"]), + ([("pkg:pypi/fetchcode", "pkg:pypi/fetchcode")]), ), ( - ["pkg:pypi/fetchcode@1.2.3"], - ("pkg:pypi/fetchcode@1.2.3", "pkg:pypi/fetchcode"), + [["pkg:pypi/fetchcode@1.2.3"]], + (["pkg:pypi/fetchcode"]), + ([("pkg:pypi/fetchcode@1.2.3", "pkg:pypi/fetchcode")]), ), ( - ["pkg:pypi/fetchcode@1.2.3?howistheweather=rainy"], + [["pkg:pypi/fetchcode@1.2.3?howistheweather=rainy"]], + (["pkg:pypi/fetchcode"]), ( - "pkg:pypi/fetchcode@1.2.3?howistheweather=rainy", - "pkg:pypi/fetchcode", + [ + ( + "pkg:pypi/fetchcode@1.2.3?howistheweather=rainy", + "pkg:pypi/fetchcode", + ) + ] ), ), ( - ["pkg:pypi/fetchcode?howistheweather=rainy"], - ("pkg:pypi/fetchcode?howistheweather=rainy", "pkg:pypi/fetchcode"), + [["pkg:pypi/fetchcode?howistheweather=rainy"]], + (["pkg:pypi/fetchcode"]), + ([("pkg:pypi/fetchcode?howistheweather=rainy", "pkg:pypi/fetchcode")]), ), ( - ["pkg:pypi/fetchcode#this/is/a/path"], - ("pkg:pypi/fetchcode#this/is/a/path", "pkg:pypi/fetchcode"), + [["pkg:pypi/fetchcode#this/is/a/path"]], + (["pkg:pypi/fetchcode"]), + ([("pkg:pypi/fetchcode#this/is/a/path", "pkg:pypi/fetchcode")]), ), ( - ["pkg:pypi/?fetchcode"], - ("pkg:pypi/?fetchcode", "pkg:pypi/"), + [["pkg:pypi/?fetchcode"]], + (["pkg:pypi/"]), + ([("pkg:pypi/?fetchcode", "pkg:pypi/")]), ), - (["zzzzz"], ("zzzzz", "zzzzz")), ], ) - def test_normalize_purl(self, test_input, expected): - normalized_purl = purlcli.normalize_purl(test_input[0]) - assert normalized_purl == expected + def test_normalize_purls( + self, test_input, expected_input_purls, expected_normalized_purls + ): + input_purls = [] + normalized_purls = [] + input_purls, normalized_purls = purlcli.normalize_purls( + test_input[0], input_purls, normalized_purls + ) + + assert input_purls == expected_input_purls + assert normalized_purls == expected_normalized_purls @pytest.mark.parametrize( "test_input,expected", @@ -727,6 +757,7 @@ def test_construct_headers(self, test_input, expected): head=None, normalized_purls=None, unique=None, + purl_warnings={"pkg:gem/bundler-sass": "valid_but_not_supported"}, ) cli_test_utils.streamline_headers(expected) cli_test_utils.streamline_headers(metadata_headers) @@ -790,6 +821,7 @@ def test_construct_headers_unique(self, test_input, expected): ("pkg:pypi/fetchcode@0.2.0", "pkg:pypi/fetchcode"), ], unique=True, + purl_warnings={"pkg:gem/bundler-sass": "valid_but_not_supported"}, ) cli_test_utils.streamline_headers(expected) cli_test_utils.streamline_headers(metadata_headers) From bd3b2cab4a9c211167ce166f3dcbee2b3dc30e66 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Tue, 5 Mar 2024 15:22:06 -0800 Subject: [PATCH 3/6] Add logging, finish data structure update and refactor code and tests #247 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- purldb-toolkit/src/purldb_toolkit/purlcli.py | 340 ++++--- .../purlcli/expected_validate_output.json | 81 ++ .../expected_validate_output_unique.json | 63 ++ .../purlcli/expected_versions_output.json | 176 ++++ .../expected_versions_output_unique.json | 140 +++ purldb-toolkit/tests/test_purlcli.py | 927 +++++++++++++----- 6 files changed, 1356 insertions(+), 371 deletions(-) create mode 100644 purldb-toolkit/tests/data/purlcli/expected_validate_output.json create mode 100644 purldb-toolkit/tests/data/purlcli/expected_validate_output_unique.json create mode 100644 purldb-toolkit/tests/data/purlcli/expected_versions_output.json create mode 100644 purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json diff --git a/purldb-toolkit/src/purldb_toolkit/purlcli.py b/purldb-toolkit/src/purldb_toolkit/purlcli.py index dfc185e2..c08a86de 100644 --- a/purldb-toolkit/src/purldb_toolkit/purlcli.py +++ b/purldb-toolkit/src/purldb_toolkit/purlcli.py @@ -8,8 +8,11 @@ # import json +import logging +import os import re from importlib.metadata import version +from pathlib import Path import click import requests @@ -18,8 +21,6 @@ from packageurl import PackageURL from packageurl.contrib import purl2url -from packagedb.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE - @click.group() def purlcli(): @@ -84,14 +85,9 @@ def get_metadata_details(purls, output, file, unique, command_name): metadata_warnings = {} - input_purls = [] - normalized_purls = [] - if unique: - input_purls, normalized_purls = normalize_purls( - purls, input_purls, normalized_purls - ) - else: - input_purls = purls + input_purls, normalized_purls = normalize_purls(purls, unique) + + clear_log_file() for purl in input_purls: purl = purl.strip() @@ -133,10 +129,10 @@ def check_metadata_purl(purl): `warnings` field of the `header` section of the JSON object returned by the `metadata` command. """ - check_validation = validate_purls([purl]) + check_validation = validate_purl(purl) if check_validation is None: return "validation_error" - results = check_validation[0] + results = check_validation if results["valid"] == False: return "not_valid" @@ -159,16 +155,20 @@ def check_metadata_purl(purl): return "not_in_upstream_repo" -def normalize_purls(purls, input_purls, normalized_purls): - for purl in purls: - input_purl = purl - purl = purl.strip() - purl = re.split("[@,?,#,]+", purl)[0] - normalized_purl = purl - - normalized_purls.append((input_purl, normalized_purl)) - if normalized_purl not in input_purls: - input_purls.append(normalized_purl) +def normalize_purls(purls, unique): + input_purls = [] + normalized_purls = [] + if unique: + for purl in purls: + input_purl = purl + purl = purl.strip() + purl = re.split("[@,?,#,]+", purl)[0] + normalized_purl = purl + normalized_purls.append((input_purl, normalized_purl)) + if normalized_purl not in input_purls: + input_purls.append(normalized_purl) + else: + input_purls = purls return input_purls, normalized_purls @@ -219,7 +219,7 @@ def construct_headers( headers_content["options"] = options headers_content["purls"] = purls - if (command_name in ["metadata", "urls"]) and unique: + if (command_name in ["metadata", "urls", "validate", "versions"]) and unique: for purl in normalized_purls: if purl[0] != purl[1]: warnings.append(f"input PURL: '{purl[0]}' normalized to '{purl[1]}'") @@ -229,6 +229,7 @@ def construct_headers( continue warning_text = { + "error_fetching_purl": f"'error fetching {purl}'", "validation_error": f"'{purl}' encountered a validation error", "not_valid": f"'{purl}' not valid", "valid_but_not_supported": f"'{purl}' not supported with `{command_name}` command", @@ -236,24 +237,18 @@ def construct_headers( "not_in_upstream_repo": f"'{purl}' does not exist in the upstream repo", } - # `metadata` warnings: - if command_name == "metadata": - purl_warning = purl_warnings.get(purl, None) - if purl_warning: - warnings.append(warning_text[purl_warning]) - print(warning_text[purl_warning]) - continue - - # `urls` warnings: - if command_name == "urls": + if command_name in ["metadata", "urls", "validate", "versions"]: purl_warning = purl_warnings.get(purl, None) if purl_warning: warnings.append(warning_text[purl_warning]) print(warning_text[purl_warning]) continue - # add `versions` warnings here - # it's not yet clear whether `validate` will have any similar warnings + log_file = Path("purldb-toolkit/src/purldb_toolkit/app.log") + if log_file.is_file(): + with open(log_file, "r") as f: + for line in f: + errors.append(line) headers_content["errors"] = errors headers_content["warnings"] = warnings @@ -324,14 +319,9 @@ def get_urls_details(purls, output, file, unique, head, command_name): urls_warnings = {} - input_purls = [] - normalized_purls = [] - if unique: - input_purls, normalized_purls = normalize_purls( - purls, input_purls, normalized_purls - ) - else: - input_purls = purls + input_purls, normalized_purls = normalize_purls(purls, unique) + + clear_log_file() for purl in input_purls: url_detail = {} @@ -451,10 +441,10 @@ def check_urls_purl(purl): or its type is not supported (or not fully supported) by `urls`, or it does not exist in the upstream repo. """ - check_validation = validate_purls([purl]) + check_validation = validate_purl(purl) if check_validation is None: return "validation_error" - results = check_validation[0] + results = check_validation if results["valid"] == False: return "not_valid" @@ -512,7 +502,6 @@ def check_urls_purl(purl): return "valid_but_not_fully_supported" -# Not yet converted to a SCTK-like data structure. @purlcli.command(name="validate") @click.option( "--purl", @@ -534,22 +523,100 @@ def check_urls_purl(purl): required=False, help="Read a list of PURLs from a FILE, one per line.", ) -def validate(purls, output, file): +@click.option( + "--unique", + is_flag=True, + required=False, + help="Return data only for unique PURLs.", +) +def validate(purls, output, file, unique): """ - Check the syntax of one or more PURLs. + Check the syntax and upstream repo status of one or more PURLs. """ check_for_duplicate_input_sources(purls, file) if file: purls = file.read().splitlines(False) - validated_purls = validate_purls(purls) + context = click.get_current_context() + command_name = context.command.name + + validated_purls = get_validate_details(purls, output, file, unique, command_name) json.dump(validated_purls, output, indent=4) -def validate_purls(purls): +def get_validate_details(purls, output, file, unique, command_name): + """ + Return a dictionary containing validation data for each PURL in the `purls` + input list. + """ + validate_details = {} + validate_details["headers"] = [] + + validate_warnings = {} + + input_purls, normalized_purls = normalize_purls(purls, unique) + + validate_details["packages"] = [] + + clear_log_file() + + for purl in input_purls: + purl = purl.strip() + if not purl: + continue + + validated_purl = check_validate_purl(purl) + + if command_name == "urls" and validated_purl in [ + "validation_error", + "not_valid", + "valid_but_not_supported", + "not_in_upstream_repo", + ]: + validate_warnings[purl] = validated_purl + continue + + if validated_purl: + validate_details["packages"].append(validate_purl(purl)) + + validate_details["headers"] = construct_headers( + purls=purls, + output=output, + file=file, + command_name=command_name, + normalized_purls=normalized_purls, + unique=unique, + purl_warnings=validate_warnings, + ) + + return validate_details + + +def check_validate_purl(purl): + """ + As applicable, return a variable indicating that the input PURL is + valid/invalid or does not exist in the upstream repo. + """ + check_validation = validate_purl(purl) + if check_validation is None: + return "validation_error" + results = check_validation + + if results["valid"] == False: + return "not_valid" + + if results["exists"] == False: + return "not_in_upstream_repo" + + if results["exists"] == True: + return check_validation + + +def validate_purl(purl): """ - Return a JSON object containing data regarding the validity of the input PURL. + Return a JSON object containing data from the PurlDB `validate` endpoint + regarding the validity of the input PURL. Based on packagedb.package_managers VERSION_API_CLASSES_BY_PACKAGE_TYPE and packagedb/api.py class PurlValidateViewSet(viewsets.ViewSet) @@ -567,31 +634,36 @@ def validate_purls(purls): "nuget", "pypi", """ + logger = logging.getLogger(__name__) + api_query = "https://public.purldb.io/api/validate/" - validated_purls = [] - for purl in purls: - purl = purl.strip() - if not purl: - continue - request_body = {"purl": purl, "check_existence": True} - response = requests.get(api_query, params=request_body) - try: - results = response.json() - # print(f"response - {response}") - # print(f"response.text - {response.text}") - # print(f"response.json() - {response.json()}") - except Exception as e: - print(f"'validate' endpoint error for '{purl}': {e}") - # print(f"response - {response}") - # print(f"response.text - {response.text}") - # print(f"response.json() - {response.json()}") - return - validated_purls.append(results) - - return validated_purls - - -# Not yet converted to a SCTK-like data structure. + request_body = {"purl": purl, "check_existence": True} + + try: + response = requests.get(api_query, params=request_body).json() + + except json.decoder.JSONDecodeError as e: + + print(f"validate_purl(): json.decoder.JSONDecodeError for '{purl}': {e}") + + logging.basicConfig( + filename="purldb-toolkit/src/purldb_toolkit/app.log", + level=logging.ERROR, + format="%(levelname)s - %(message)s", + filemode="w", + ) + + logger.error(f"validate_purl(): json.decoder.JSONDecodeError for '{purl}': {e}") + + except Exception as e: + print(f"'validate' endpoint error for '{purl}': {e}") + + else: + if response is None: + print(f"'{purl}' -- response.status_code for None = {response.status_code}") + return response + + @purlcli.command(name="versions") @click.option( "--purl", @@ -613,7 +685,13 @@ def validate_purls(purls): required=False, help="Read a list of PURLs from a FILE, one per line.", ) -def get_versions(purls, output, file): +@click.option( + "--unique", + is_flag=True, + required=False, + help="Return data only for unique PURLs.", +) +def get_versions(purls, output, file, unique): """ Given one or more PURLs, return a list of all known versions for each PURL. """ @@ -625,78 +703,86 @@ def get_versions(purls, output, file): context = click.get_current_context() command_name = context.command.name - purl_versions = list_versions(purls, output, file, command_name) + purl_versions = get_versions_details(purls, output, file, unique, command_name) json.dump(purl_versions, output, indent=4) -# construct_headers() has not yet been implemented for this `versions` command -# -- or for the `validate` command. -def list_versions(purls, output, file, command_name): +def get_versions_details(purls, output, file, unique, command_name): """ Return a list of dictionaries containing version-related data for each PURL in the `purls` input list. `check_versions_purl()` will print an error message to the console (also displayed in the JSON output) when necessary. """ - purl_versions = [] - for purl in purls: - purl_data = {} - purl_data["purl"] = purl - purl_data["versions"] = [] + versions_details = {} + versions_details["headers"] = [] + versions_details["packages"] = [] - purl = purl.strip() - if not purl: - continue + versions_warnings = {} - versions_purl = check_versions_purl(purl) + input_purls, normalized_purls = normalize_purls(purls, unique) - if command_name == "versions" and versions_purl == "not_valid": - print(f"'{purl}' not valid") - continue - - if command_name == "versions" and versions_purl == "valid_but_not_supported": - print(f"'{purl}' not supported with `versions` command") - continue + clear_log_file() - if command_name == "versions" and versions_purl == "not_in_upstream_repo": - print(f"'{purl}' does not exist in the upstream repo") + for purl in input_purls: + purl = purl.strip() + if not purl: continue - # TODO: Add to warnings and test it as well. - if command_name == "versions" and versions_purl == "validation_error": - print(f"'{purl}' encountered a validation error") - continue + purl_data = {} + purl_data["purl"] = purl + purl_data["versions"] = [] - # TODO: Is this needed to catch the intermittent fetchcode/package_versions.py versions()/get_response() `Error while fetching` error? I don't think so. - # if command_name == "versions" and versions_purl == "error_fetching_purl": - # print(f"Error fetching '{purl}'") + versions_purl = check_versions_purl(purl) - # TODO: Is the subsumed by the preceding `validation_error`? I think YES. - # if versions(purl) is None: - # print(f"{purl} encountered a versions(purl) error") - # continue + if command_name == "versions" and versions_purl: + versions_warnings[purl] = versions_purl + continue - for package_version_object in list(versions(purl)): + for package_version in list(versions(purl)): purl_version_data = {} - purl_version = package_version_object.to_dict()["value"] - nested_purl = purl + "@" + f"{purl_version}" + purl_version = package_version.to_dict()["value"] + + # We use `versions()` from fetchcode/package_versions.py, which + # keeps the version (if any) of the input PURL in its output, so + # "pkg:pypi/fetchcode@0.3.0" is returned as + # "pkg:pypi/fetchcode@0.3.0@0.1.0", "pkg:pypi/fetchcode@0.3.0@0.2.0" + # etc. Thus, we remove any string starting with `@` first. + raw_purl = purl = re.split("[@,]+", purl)[0] + nested_purl = raw_purl + "@" + f"{purl_version}" purl_version_data["purl"] = nested_purl purl_version_data["version"] = f"{purl_version}" purl_version_data["release_date"] = ( - f'{package_version_object.to_dict()["release_date"]}' + f'{package_version.to_dict()["release_date"]}' ) purl_data["versions"].append(purl_version_data) - purl_versions.append(purl_data) + versions_details["packages"].append(purl_data) - return purl_versions + versions_details["headers"] = construct_headers( + purls=purls, + output=output, + file=file, + command_name=command_name, + normalized_purls=normalized_purls, + unique=unique, + purl_warnings=versions_warnings, + ) + + return versions_details def check_versions_purl(purl): """ - Return a message for printing to the console if the input PURL is invalid, - its type is not supported by `versions` or its existence was not validated. + Return a variable identifying the message for printing to the console by + get_versions_details() if (1) the input PURL is invalid, (2) its type is not + supported by `versions` or (3) its existence was not validated (e.g., + "does not exist in the upstream repo"). + + This message will also be reported by construct_headers() in the + `warnings` field of the `header` section of the JSON object returned by + the `versions` command. Note for dev purposes: SUPPORTED_ECOSYSTEMS (imported from fetchcode.package_versions) comprises the following types: @@ -715,25 +801,15 @@ def check_versions_purl(purl): "pypi", ] """ - check_validation = validate_purls([purl]) + check_validation = validate_purl(purl) if check_validation is None: return "validation_error" - results = check_validation[0] - - # TODO: Is this needed to catch the intermittent fetchcode/package_versions.py versions()/get_response() `Error while fetching` error? No, it does not catch that error. - # 2024-02-27 Tuesday 16:43:54. Just got one: - # (venv) Tue Feb 27, 2024 04:40 PM /home/jmh/dev/nexb/purldb jmh (247-purlcli-update-validate-and-versions) - # $ python -m purldb_toolkit.purlcli versions --purl pkg:gem/bundler-sass --purl pkg:deb/debian/2ping --output - - # Error while fetching 'https://sources.debian.org/api/src/2ping': 503 - # Traceback (most recent call last): - # if results is None: - # return "error_fetching_purl" + results = check_validation if results["valid"] == False: return "not_valid" supported = SUPPORTED_ECOSYSTEMS - versions_purl = PackageURL.from_string(purl) if versions_purl.type not in supported: @@ -757,5 +833,11 @@ def check_for_duplicate_input_sources(purls, file): raise click.UsageError("Use either purls or file.") +def clear_log_file(): + log_file = Path("purldb-toolkit/src/purldb_toolkit/app.log") + if log_file.is_file(): + os.remove(log_file) + + if __name__ == "__main__": purlcli() diff --git a/purldb-toolkit/tests/data/purlcli/expected_validate_output.json b/purldb-toolkit/tests/data/purlcli/expected_validate_output.json new file mode 100644 index 00000000..928822f8 --- /dev/null +++ b/purldb-toolkit/tests/data/purlcli/expected_validate_output.json @@ -0,0 +1,81 @@ +{ + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "validate", + "--purl": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:gem/rails", + "pkg:rubygems/rails" + ], + "--file": null, + "--output": "" + }, + "purls": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:gem/rails", + "pkg:rubygems/rails" + ], + "errors": [], + "warnings": [] + } + ], + "packages": [ + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode" + }, + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode@0.3.0" + }, + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode@0.3.0?os=windows" + }, + { + "valid": true, + "exists": false, + "message": "The provided PackageURL is valid, but does not exist in the upstream repo.", + "purl": "pkg:pypi/fetchcode@0.3.0os=windows" + }, + { + "valid": true, + "exists": false, + "message": "The provided PackageURL is valid, but does not exist in the upstream repo.", + "purl": "pkg:pypi/fetchcode@5.0.0" + }, + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:cargo/banquo" + }, + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:gem/rails" + } + ] +} diff --git a/purldb-toolkit/tests/data/purlcli/expected_validate_output_unique.json b/purldb-toolkit/tests/data/purlcli/expected_validate_output_unique.json new file mode 100644 index 00000000..bd3349d5 --- /dev/null +++ b/purldb-toolkit/tests/data/purlcli/expected_validate_output_unique.json @@ -0,0 +1,63 @@ +{ + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "validate", + "--purl": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:gem/rails", + "pkg:rubygems/rails" + ], + "--file": null, + "--unique": true, + "--output": "" + }, + "purls": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:gem/rails", + "pkg:rubygems/rails" + ], + "errors": [], + "warnings": [ + "input PURL: 'pkg:pypi/fetchcode@0.3.0' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@0.3.0?os=windows' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@0.3.0os=windows' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@5.0.0' normalized to 'pkg:pypi/fetchcode'" + ] + } + ], + "packages": [ + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode" + }, + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:cargo/banquo" + }, + { + "valid": true, + "exists": true, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:gem/rails" + } + ] +} diff --git a/purldb-toolkit/tests/data/purlcli/expected_versions_output.json b/purldb-toolkit/tests/data/purlcli/expected_versions_output.json new file mode 100644 index 00000000..10443de7 --- /dev/null +++ b/purldb-toolkit/tests/data/purlcli/expected_versions_output.json @@ -0,0 +1,176 @@ +{ + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:hex/coherence@0.1.0" + ], + "--file": null, + "--output": "" + }, + "purls": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:hex/coherence@0.1.0" + ], + "errors": [], + "warnings": [ + "'pkg:pypi/fetchcode@0.3.0os=windows' does not exist in the upstream repo", + "'pkg:pypi/fetchcode@5.0.0' does not exist in the upstream repo", + "'pkg:nginx/nginx' not supported with `versions` command" + ] + } + ], + "packages": [ + { + "purl": "pkg:pypi/fetchcode", + "versions": [ + { + "purl": "pkg:pypi/fetchcode@0.1.0", + "version": "0.1.0", + "release_date": "2021-08-25T15:15:15.265015+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.2.0", + "version": "0.2.0", + "release_date": "2022-09-14T16:36:02.242182+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "version": "0.3.0", + "release_date": "2023-12-18T20:49:45.840364+00:00" + } + ] + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "versions": [ + { + "purl": "pkg:pypi/fetchcode@0.1.0", + "version": "0.1.0", + "release_date": "2021-08-25T15:15:15.265015+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.2.0", + "version": "0.2.0", + "release_date": "2022-09-14T16:36:02.242182+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "version": "0.3.0", + "release_date": "2023-12-18T20:49:45.840364+00:00" + } + ] + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0?os=windows", + "versions": [ + { + "purl": "pkg:pypi/fetchcode@0.1.0", + "version": "0.1.0", + "release_date": "2021-08-25T15:15:15.265015+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.2.0", + "version": "0.2.0", + "release_date": "2022-09-14T16:36:02.242182+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "version": "0.3.0", + "release_date": "2023-12-18T20:49:45.840364+00:00" + } + ] + }, + { + "purl": "pkg:cargo/banquo", + "versions": [ + { + "purl": "pkg:cargo/banquo@0.1.0", + "version": "0.1.0", + "release_date": "2024-02-07T23:21:50.548891+00:00" + } + ] + }, + { + "purl": "pkg:hex/coherence@0.1.0", + "versions": [ + { + "purl": "pkg:hex/coherence@0.8.0", + "version": "0.8.0", + "release_date": "2023-09-22T18:28:36.224103+00:00" + }, + { + "purl": "pkg:hex/coherence@0.5.2", + "version": "0.5.2", + "release_date": "2018-09-03T23:52:38.161321+00:00" + }, + { + "purl": "pkg:hex/coherence@0.5.1", + "version": "0.5.1", + "release_date": "2018-08-28T01:33:14.565151+00:00" + }, + { + "purl": "pkg:hex/coherence@0.5.0", + "version": "0.5.0", + "release_date": "2017-08-02T06:23:12.948525+00:00" + }, + { + "purl": "pkg:hex/coherence@0.4.0", + "version": "0.4.0", + "release_date": "2017-07-03T21:55:56.591426+00:00" + }, + { + "purl": "pkg:hex/coherence@0.3.1", + "version": "0.3.1", + "release_date": "2016-11-27T05:30:34.553920+00:00" + }, + { + "purl": "pkg:hex/coherence@0.3.0", + "version": "0.3.0", + "release_date": "2016-08-28T19:04:10.794525+00:00" + }, + { + "purl": "pkg:hex/coherence@0.2.0", + "version": "0.2.0", + "release_date": "2016-07-30T21:07:45.377540+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.3", + "version": "0.1.3", + "release_date": "2016-07-19T03:33:09.185782+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.2", + "version": "0.1.2", + "release_date": "2016-07-12T18:41:27.084599+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.1", + "version": "0.1.1", + "release_date": "2016-07-11T13:56:26.388096+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.0", + "version": "0.1.0", + "release_date": "2016-07-11T06:52:43.545719+00:00" + } + ] + } + ] +} diff --git a/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json b/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json new file mode 100644 index 00000000..0a7b0869 --- /dev/null +++ b/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json @@ -0,0 +1,140 @@ +{ + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:hex/coherence@0.1.0" + ], + "--file": null, + "--unique": true, + "--output": "" + }, + "purls": [ + "pkg:pypi/fetchcode", + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "pkg:pypi/fetchcode@0.3.0os=windows", + "pkg:pypi/fetchcode@5.0.0", + "pkg:cargo/banquo", + "pkg:nginx/nginx", + "pkg:hex/coherence@0.1.0" + ], + "errors": [], + "warnings": [ + "input PURL: 'pkg:pypi/fetchcode@0.3.0' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@0.3.0?os=windows' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@0.3.0os=windows' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:pypi/fetchcode@5.0.0' normalized to 'pkg:pypi/fetchcode'", + "input PURL: 'pkg:hex/coherence@0.1.0' normalized to 'pkg:hex/coherence'", + "'pkg:nginx/nginx' not supported with `versions` command" + ] + } + ], + "packages": [ + { + "purl": "pkg:pypi/fetchcode", + "versions": [ + { + "purl": "pkg:pypi/fetchcode@0.1.0", + "version": "0.1.0", + "release_date": "2021-08-25T15:15:15.265015+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.2.0", + "version": "0.2.0", + "release_date": "2022-09-14T16:36:02.242182+00:00" + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "version": "0.3.0", + "release_date": "2023-12-18T20:49:45.840364+00:00" + } + ] + }, + { + "purl": "pkg:cargo/banquo", + "versions": [ + { + "purl": "pkg:cargo/banquo@0.1.0", + "version": "0.1.0", + "release_date": "2024-02-07T23:21:50.548891+00:00" + } + ] + }, + { + "purl": "pkg:hex/coherence", + "versions": [ + { + "purl": "pkg:hex/coherence@0.8.0", + "version": "0.8.0", + "release_date": "2023-09-22T18:28:36.224103+00:00" + }, + { + "purl": "pkg:hex/coherence@0.5.2", + "version": "0.5.2", + "release_date": "2018-09-03T23:52:38.161321+00:00" + }, + { + "purl": "pkg:hex/coherence@0.5.1", + "version": "0.5.1", + "release_date": "2018-08-28T01:33:14.565151+00:00" + }, + { + "purl": "pkg:hex/coherence@0.5.0", + "version": "0.5.0", + "release_date": "2017-08-02T06:23:12.948525+00:00" + }, + { + "purl": "pkg:hex/coherence@0.4.0", + "version": "0.4.0", + "release_date": "2017-07-03T21:55:56.591426+00:00" + }, + { + "purl": "pkg:hex/coherence@0.3.1", + "version": "0.3.1", + "release_date": "2016-11-27T05:30:34.553920+00:00" + }, + { + "purl": "pkg:hex/coherence@0.3.0", + "version": "0.3.0", + "release_date": "2016-08-28T19:04:10.794525+00:00" + }, + { + "purl": "pkg:hex/coherence@0.2.0", + "version": "0.2.0", + "release_date": "2016-07-30T21:07:45.377540+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.3", + "version": "0.1.3", + "release_date": "2016-07-19T03:33:09.185782+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.2", + "version": "0.1.2", + "release_date": "2016-07-12T18:41:27.084599+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.1", + "version": "0.1.1", + "release_date": "2016-07-11T13:56:26.388096+00:00" + }, + { + "purl": "pkg:hex/coherence@0.1.0", + "version": "0.1.0", + "release_date": "2016-07-11T06:52:43.545719+00:00" + } + ] + } + ] +} diff --git a/purldb-toolkit/tests/test_purlcli.py b/purldb-toolkit/tests/test_purlcli.py index 6aa3b13f..ebc276e6 100644 --- a/purldb-toolkit/tests/test_purlcli.py +++ b/purldb-toolkit/tests/test_purlcli.py @@ -11,9 +11,7 @@ import os from collections import OrderedDict -import click import pytest -import requests from click.testing import CliRunner from commoncode.testcase import FileDrivenTesting from purldb_toolkit import cli_test_utils, purlcli @@ -97,19 +95,14 @@ def test_metadata_cli(self): output_data["headers"][0]["options"]["--file"], expected_data["headers"][0]["options"]["--file"], ), - (output_data["packages"], expected_data["packages"]), ] for output, expected in result_objects: assert output == expected - """ - QUESTION: Is this a better way to test the contents of `packages`? - We already remove some dynamic fields like `download_url`, but - `metadata` also adds new versions as they appear. The below approach - avoids an error from a new version while checking whether the existing - expected versions still appear in the result data. - """ + # NOTE: To avoid errors from the addition of new versions, we exclude + # `(output_data["packages"], expected_data["packages"])` from the + # result_objects list above and handle here. for expected in expected_data["packages"]: assert expected in output_data["packages"] @@ -189,16 +182,12 @@ def test_metadata_cli_unique(self): output_data["headers"][0]["options"]["--unique"], expected_data["headers"][0]["options"]["--unique"], ), - (output_data["packages"], expected_data["packages"]), ] for output, expected in result_objects: assert output == expected - """ - QUESTION: Is this a better way to test the contents of `packages`? - See point under test_metadata_cli() re addition of new versions. - """ + # See note under test_metadata_cli() re addition of new versions. for expected in expected_data["packages"]: assert expected in output_data["packages"] @@ -606,12 +595,7 @@ def test_metadata_details(self, test_input, expected): cli_test_utils.streamline_headers(expected["headers"]) streamline_metadata_packages(expected["packages"]) - assert purl_metadata == expected - - """ - QUESTION: Is this a better way to test the contents of `packages`? - See note under test_metadata_cli() re addition of new versions. - """ + # See note under test_metadata_cli() re addition of new versions. assert purl_metadata["headers"] == expected["headers"] for expected in expected["packages"]: @@ -694,16 +678,61 @@ def test_check_metadata_purl(self, test_input, expected): (["pkg:pypi/"]), ([("pkg:pypi/?fetchcode", "pkg:pypi/")]), ), + ( + [ + [ + "pkg:pypi/fetchcode@0.3.0", + "pkg:pypi/fetchcode@5.0.0", + "pkg:pypi/dejacode", + "pkg:pypi/dejacode@5.0.0", + "pkg:pypi/dejacode@5.0.0?os=windows", + "pkg:pypi/dejacode@5.0.0os=windows", + "pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy", + "pkg:pypi/dejacode@5.0.0#how/are/you", + "pkg:pypi/dejacode@10.0.0", + "pkg:cargo/banquo", + "pkg:cargo/socksprox", + "pkg:nginx/nginx", + "pkg:nginx/nginx@0.8.9?os=windows", + ] + ], + ( + [ + "pkg:pypi/fetchcode", + "pkg:pypi/dejacode", + "pkg:cargo/banquo", + "pkg:cargo/socksprox", + "pkg:nginx/nginx", + ] + ), + ( + [ + ("pkg:pypi/fetchcode@0.3.0", "pkg:pypi/fetchcode"), + ("pkg:pypi/fetchcode@5.0.0", "pkg:pypi/fetchcode"), + ("pkg:pypi/dejacode", "pkg:pypi/dejacode"), + ("pkg:pypi/dejacode@5.0.0", "pkg:pypi/dejacode"), + ("pkg:pypi/dejacode@5.0.0?os=windows", "pkg:pypi/dejacode"), + ("pkg:pypi/dejacode@5.0.0os=windows", "pkg:pypi/dejacode"), + ( + "pkg:pypi/dejacode@5.0.0?how_is_the_weather=rainy", + "pkg:pypi/dejacode", + ), + ("pkg:pypi/dejacode@5.0.0#how/are/you", "pkg:pypi/dejacode"), + ("pkg:pypi/dejacode@10.0.0", "pkg:pypi/dejacode"), + ("pkg:cargo/banquo", "pkg:cargo/banquo"), + ("pkg:cargo/socksprox", "pkg:cargo/socksprox"), + ("pkg:nginx/nginx", "pkg:nginx/nginx"), + ("pkg:nginx/nginx@0.8.9?os=windows", "pkg:nginx/nginx"), + ] + ), + ), ], ) def test_normalize_purls( self, test_input, expected_input_purls, expected_normalized_purls ): - input_purls = [] - normalized_purls = [] - input_purls, normalized_purls = purlcli.normalize_purls( - test_input[0], input_purls, normalized_purls - ) + unique = True + input_purls, normalized_purls = purlcli.normalize_purls(test_input[0], unique) assert input_purls == expected_input_purls assert normalized_purls == expected_normalized_purls @@ -833,10 +862,6 @@ class TestPURLCLI_urls(object): def test_urls_cli(self): """ Test the `urls` command with actual and expected JSON output files. - - Note that we can't simply compare the actual and expected JSON files - because the `--output` values (paths) differ due to the use of - temporary files, and therefore we test a list of relevant key-value pairs. """ expected_result_file = test_env.get_test_loc( "purlcli/expected_urls_output.json" @@ -933,10 +958,6 @@ def test_urls_cli(self): def test_urls_cli_unique(self): """ Test the `urls` command with actual and expected JSON output files. - - Note that we can't simply compare the actual and expected JSON files - because the `--output` values (paths) differ due to the use of - temporary files, and therefore we test a list of relevant key-value pairs. """ expected_result_file = test_env.get_test_loc( "purlcli/expected_urls_output_unique.json" @@ -1034,10 +1055,6 @@ def test_urls_cli_unique(self): def test_urls_cli_head(self): """ Test the `urls` command with actual and expected JSON output files. - - Note that we can't simply compare the actual and expected JSON files - because the `--output` values (paths) differ due to the use of - temporary files, and therefore we test a list of relevant key-value pairs. """ expected_result_file = test_env.get_test_loc( "purlcli/expected_urls_output_head.json" @@ -1504,261 +1521,687 @@ def test_make_head_request(self, test_input, expected): assert purl_status_code == expected -# TODO: not yet converted to a SCTK-like data structure. class TestPURLCLI_validate(object): - @pytest.mark.parametrize( - "test_input,expected", - [ + def test_validate_cli(self): + """ + Test the `validate` command with actual and expected JSON output files. + """ + expected_result_file = test_env.get_test_loc( + "purlcli/expected_validate_output.json" + ) + actual_result_file = test_env.get_temp_file("actual_validate_output.json") + options = [ + "--purl", + "pkg:pypi/fetchcode", + "--purl", + "pkg:pypi/fetchcode@0.3.0", + "--purl", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "--purl", + "pkg:pypi/fetchcode@0.3.0os=windows", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", + "pkg:cargo/banquo", + "--purl", + "pkg:nginx/nginx", + "--purl", + "pkg:gem/rails", + "--purl", + "pkg:rubygems/rails", + "--output", + actual_result_file, + ] + runner = CliRunner() + result = runner.invoke(purlcli.validate, options, catch_exceptions=False) + assert result.exit_code == 0 + + f_output = open(actual_result_file) + output_data = json.load(f_output) + + f_expected = open(expected_result_file) + expected_data = json.load(f_expected) + + result_objects = [ ( - ["pkg:pypi/fetchcode@0.2.0"], - [ - { - "valid": True, - "exists": True, - "message": "The provided Package URL is valid, and the package exists in the upstream repo.", - "purl": "pkg:pypi/fetchcode@0.2.0", - } - ], + output_data["headers"][0]["tool_name"], + expected_data["headers"][0]["tool_name"], ), + (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( - ["pkg:pypi/fetchcode@10.2.0"], - [ - { - "valid": True, - "exists": False, - "message": "The provided PackageURL is valid, but does not exist in the upstream repo.", - "purl": "pkg:pypi/fetchcode@10.2.0", - } - ], + output_data["headers"][0]["warnings"], + expected_data["headers"][0]["warnings"], ), ( - ["pkg:nginx/nginx@0.8.9?os=windows"], - [ - { - "valid": True, - "exists": None, - "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", - "purl": "pkg:nginx/nginx@0.8.9?os=windows", - } - ], + output_data["headers"][0]["errors"], + expected_data["headers"][0]["errors"], ), ( - ["pkg:gem/bundler-sass"], - [ - { - "valid": True, - "exists": True, - "message": "The provided Package URL is valid, and the package exists in the upstream repo.", - "purl": "pkg:gem/bundler-sass", - } - ], + output_data["headers"][0]["options"]["command"], + expected_data["headers"][0]["options"]["command"], ), ( - ["pkg:rubygems/bundler-sass"], - [ - { - "valid": True, - "exists": None, - "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", - "purl": "pkg:rubygems/bundler-sass", - } - ], + output_data["headers"][0]["options"]["--purl"], + expected_data["headers"][0]["options"]["--purl"], ), ( - ["pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.0-rc1"], - [ - { - "valid": True, - "exists": True, - "message": "The provided Package URL is valid, and the package exists in the upstream repo.", - "purl": "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.0-rc1", - } - ], + output_data["headers"][0]["options"]["--file"], + expected_data["headers"][0]["options"]["--file"], ), - ], - ) - def test_validate_purl(self, test_input, expected): - validated_purls = purlcli.validate_purls(test_input) - assert validated_purls == expected + (output_data["packages"], expected_data["packages"]), + ] - def test_validate_purl_empty(self): - test_purls = [] - validated_purls = purlcli.validate_purls(test_purls) - expected_results = [] - assert validated_purls == expected_results + for output, expected in result_objects: + assert output == expected - @pytest.mark.parametrize( - "test_input,expected", - [ + def test_validate_cli_unique(self): + """ + Test the `validate` command with actual and expected JSON output files + with the `--unique` flag included in the command. + """ + expected_result_file = test_env.get_test_loc( + "purlcli/expected_validate_output_unique.json" + ) + actual_result_file = test_env.get_temp_file("actual_validate_output.json") + options = [ + "--purl", + "pkg:pypi/fetchcode", + "--purl", + "pkg:pypi/fetchcode@0.3.0", + "--purl", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "--purl", + "pkg:pypi/fetchcode@0.3.0os=windows", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", + "pkg:cargo/banquo", + "--purl", + "pkg:nginx/nginx", + "--purl", + "pkg:gem/rails", + "--purl", + "pkg:rubygems/rails", + "--output", + actual_result_file, + "--unique", + ] + runner = CliRunner() + result = runner.invoke(purlcli.validate, options, catch_exceptions=False) + assert result.exit_code == 0 + + f_output = open(actual_result_file) + output_data = json.load(f_output) + + f_expected = open(expected_result_file) + expected_data = json.load(f_expected) + + result_objects = [ ( - ["pkg:pypi/fetchcode@0.2.0"], - [ - { - "valid": True, - "exists": True, - "message": "The provided Package URL is valid, and the package exists in the upstream repo.", - "purl": "pkg:pypi/fetchcode@0.2.0", - } - ], + output_data["headers"][0]["tool_name"], + expected_data["headers"][0]["tool_name"], ), + (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), ( - ["pkg:pypi/fetchcode@0.2.0?"], - [ - { - "valid": True, - "exists": True, - "message": "The provided Package URL is valid, and the package exists in the upstream repo.", - "purl": "pkg:pypi/fetchcode@0.2.0?", - } - ], + output_data["headers"][0]["warnings"], + expected_data["headers"][0]["warnings"], ), ( - ["pkg:pypi/fetchcode@?0.2.0"], - [ - { - "valid": False, - "exists": None, - "message": "The provided PackageURL is not valid.", - "purl": "pkg:pypi/fetchcode@?0.2.0", - } - ], + output_data["headers"][0]["errors"], + expected_data["headers"][0]["errors"], ), ( - ["foo"], - [ - { - "valid": False, - "exists": None, - "message": "The provided PackageURL is not valid.", - "purl": "foo", - } - ], + output_data["headers"][0]["options"]["command"], + expected_data["headers"][0]["options"]["command"], ), - ], - ) - def test_validate_purl_invalid(self, test_input, expected): - validated_purls = purlcli.validate_purls(test_input) - assert validated_purls == expected - - @pytest.mark.parametrize( - "test_input,expected", - [ ( - ["pkg:nginx/nginx@0.8.9?os=windows"], - [ - { - "valid": True, - "exists": None, - "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", - "purl": "pkg:nginx/nginx@0.8.9?os=windows", - }, - ], + output_data["headers"][0]["options"]["--purl"], + expected_data["headers"][0]["options"]["--purl"], ), ( - [" pkg:nginx/nginx@0.8.9?os=windows"], - [ - { - "valid": True, - "exists": None, - "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", - "purl": "pkg:nginx/nginx@0.8.9?os=windows", - }, - ], + output_data["headers"][0]["options"]["--file"], + expected_data["headers"][0]["options"]["--file"], ), ( - ["pkg:nginx/nginx@0.8.9?os=windows "], - [ - { - "valid": True, - "exists": None, - "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", - "purl": "pkg:nginx/nginx@0.8.9?os=windows", - } - ], + output_data["headers"][0]["options"]["--unique"], + expected_data["headers"][0]["options"]["--unique"], ), - ], - ) - def test_validate_purl_strip(self, test_input, expected): - validated_purls = purlcli.validate_purls(test_input) - assert validated_purls == expected + (output_data["packages"], expected_data["packages"]), + ] + for output, expected in result_objects: + assert output == expected -# TODO: not yet converted to a SCTK-like data structure. -class TestPURLCLI_versions(object): @pytest.mark.parametrize( "test_input,expected", [ ( - ["pkg:pypi/fetchcode"], - [ - { - "purl": "pkg:pypi/fetchcode", - "versions": [ - { - "purl": "pkg:pypi/fetchcode@0.1.0", - "version": "0.1.0", - "release_date": "2021-08-25T15:15:15.265015+00:00", - }, - { - "purl": "pkg:pypi/fetchcode@0.2.0", - "version": "0.2.0", - "release_date": "2022-09-14T16:36:02.242182+00:00", - }, - { - "purl": "pkg:pypi/fetchcode@0.3.0", - "version": "0.3.0", - "release_date": "2023-12-18T20:49:45.840364+00:00", - }, - ], - }, - ], - ), - ( - ["pkg:gem/bundler-sass"], - [ - { - "purl": "pkg:gem/bundler-sass", - "versions": [ - { - "purl": "pkg:gem/bundler-sass@0.1.2", - "release_date": "2013-12-11T00:27:10.097000+00:00", - "version": "0.1.2", - }, - ], - }, - ], + "pkg:pypi/fetchcode@0.2.0", + { + "valid": True, + "exists": True, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode@0.2.0", + }, ), ( - ["pkg:rubygems/bundler-sass"], - [], + "pkg:pypi/fetchcode@10.2.0", + { + "valid": True, + "exists": False, + "message": "The provided PackageURL is valid, but does not exist in the upstream repo.", + "purl": "pkg:pypi/fetchcode@10.2.0", + }, ), ( - ["pkg:nginx/nginx"], - [], + "pkg:nginx/nginx@0.8.9?os=windows", + { + "valid": True, + "exists": None, + "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", + "purl": "pkg:nginx/nginx@0.8.9?os=windows", + }, ), ( - ["pkg:pypi/zzzzz"], - [], + "pkg:gem/bundler-sass", + { + "valid": True, + "exists": True, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:gem/bundler-sass", + }, ), ( - ["pkg:pypi/?fetchcode"], - [], + "pkg:rubygems/bundler-sass", + { + "valid": True, + "exists": None, + "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", + "purl": "pkg:rubygems/bundler-sass", + }, ), ( - ["zzzzz"], - [], + "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.0-rc1", + { + "valid": True, + "exists": True, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:maven/com.fasterxml.jackson.core/jackson-databind@2.14.0-rc1", + }, ), ], ) - def test_versions(self, test_input, expected): - # TODO: not yet updated to SCTK-like structure. - output = "" + def test_validate_purl(self, test_input, expected): + validated_purl = purlcli.validate_purl(test_input) + assert validated_purl == expected + + def test_validate_purl_empty(self): + test_input = None + validated_purl = purlcli.validate_purl(test_input) + expected_results = {"errors": {"purl": ["This field is required."]}} + assert validated_purl == expected_results + + @pytest.mark.parametrize( + "test_input,expected", + [ + ( + "pkg:pypi/fetchcode@0.2.0", + { + "valid": True, + "exists": True, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode@0.2.0", + }, + ), + ( + "pkg:pypi/fetchcode@0.2.0?", + { + "valid": True, + "exists": True, + "message": "The provided Package URL is valid, and the package exists in the upstream repo.", + "purl": "pkg:pypi/fetchcode@0.2.0?", + }, + ), + ( + "pkg:pypi/fetchcode@?0.2.0", + { + "valid": False, + "exists": None, + "message": "The provided PackageURL is not valid.", + "purl": "pkg:pypi/fetchcode@?0.2.0", + }, + ), + ( + "foo", + { + "valid": False, + "exists": None, + "message": "The provided PackageURL is not valid.", + "purl": "foo", + }, + ), + ], + ) + def test_validate_purl_invalid(self, test_input, expected): + validated_purl = purlcli.validate_purl(test_input) + assert validated_purl == expected + + @pytest.mark.parametrize( + "test_input,expected", + [ + ( + "pkg:nginx/nginx@0.8.9?os=windows", + { + "valid": True, + "exists": None, + "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", + "purl": "pkg:nginx/nginx@0.8.9?os=windows", + }, + ), + ( + " pkg:nginx/nginx@0.8.9?os=windows", + { + "valid": True, + "exists": None, + "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", + "purl": "pkg:nginx/nginx@0.8.9?os=windows", + }, + ), + ( + "pkg:nginx/nginx@0.8.9?os=windows ", + { + "valid": True, + "exists": None, + "message": "The provided PackageURL is valid, but `check_existence` is not supported for this package type.", + "purl": "pkg:nginx/nginx@0.8.9?os=windows", + }, + ), + ], + ) + def test_validate_purl_strip(self, test_input, expected): + validated_purl = purlcli.validate_purl(test_input) + assert validated_purl == expected + + +class TestPURLCLI_versions(object): + def test_versions_cli(self): + """ + Test the `versions` command with actual and expected JSON output files. + """ + expected_result_file = test_env.get_test_loc( + "purlcli/expected_versions_output.json" + ) + actual_result_file = test_env.get_temp_file("actual_versions_output.json") + options = [ + "--purl", + "pkg:pypi/fetchcode", + "--purl", + "pkg:pypi/fetchcode@0.3.0", + "--purl", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "--purl", + "pkg:pypi/fetchcode@0.3.0os=windows", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", + "pkg:cargo/banquo", + "--purl", + "pkg:nginx/nginx", + "--purl", + "pkg:hex/coherence@0.1.0", + "--output", + actual_result_file, + ] + runner = CliRunner() + result = runner.invoke(purlcli.get_versions, options, catch_exceptions=False) + assert result.exit_code == 0 + + f_output = open(actual_result_file) + output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) + + f_expected = open(expected_result_file) + expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) + + result_objects = [ + ( + output_data["headers"][0]["tool_name"], + expected_data["headers"][0]["tool_name"], + ), + (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), + ( + output_data["headers"][0]["warnings"], + expected_data["headers"][0]["warnings"], + ), + ( + output_data["headers"][0]["errors"], + expected_data["headers"][0]["errors"], + ), + ( + output_data["headers"][0]["options"]["command"], + expected_data["headers"][0]["options"]["command"], + ), + ( + output_data["headers"][0]["options"]["--purl"], + expected_data["headers"][0]["options"]["--purl"], + ), + ( + output_data["headers"][0]["options"]["--file"], + expected_data["headers"][0]["options"]["--file"], + ), + ] + + for output, expected in result_objects: + assert output == expected + + # NOTE: To avoid errors from the addition of new versions, we exclude + # `(output_data["packages"], expected_data["packages"])` from the + # result_objects list above and handle here. + expected_versions = [] + output_versions = [] + for expected in expected_data["packages"]: + expected_versions = expected["versions"] + for output in output_data["packages"]: + output_versions = output["versions"] + + assert [i for i in expected_versions if i not in output_versions] == [] + + def test_versions_cli_unique(self): + """ + Test the `versions` command with actual and expected JSON output files + with the `--unique` flag included in the command. + """ + expected_result_file = test_env.get_test_loc( + "purlcli/expected_versions_output_unique.json" + ) + actual_result_file = test_env.get_temp_file("actual_versions_output.json") + options = [ + "--purl", + "pkg:pypi/fetchcode", + "--purl", + "pkg:pypi/fetchcode@0.3.0", + "--purl", + "pkg:pypi/fetchcode@0.3.0?os=windows", + "--purl", + "pkg:pypi/fetchcode@0.3.0os=windows", + "--purl", + "pkg:pypi/fetchcode@5.0.0", + "--purl", + "pkg:cargo/banquo", + "--purl", + "pkg:nginx/nginx", + "--purl", + "pkg:hex/coherence@0.1.0", + "--output", + actual_result_file, + "--unique", + ] + runner = CliRunner() + result = runner.invoke(purlcli.get_versions, options, catch_exceptions=False) + assert result.exit_code == 0 + + f_output = open(actual_result_file) + output_data = json.load(f_output) + cli_test_utils.streamline_headers(output_data["headers"]) + + f_expected = open(expected_result_file) + expected_data = json.load(f_expected) + cli_test_utils.streamline_headers(expected_data["headers"]) + + result_objects = [ + ( + output_data["headers"][0]["tool_name"], + expected_data["headers"][0]["tool_name"], + ), + (output_data["headers"][0]["purls"], expected_data["headers"][0]["purls"]), + ( + output_data["headers"][0]["warnings"], + expected_data["headers"][0]["warnings"], + ), + ( + output_data["headers"][0]["errors"], + expected_data["headers"][0]["errors"], + ), + ( + output_data["headers"][0]["options"]["command"], + expected_data["headers"][0]["options"]["command"], + ), + ( + output_data["headers"][0]["options"]["--purl"], + expected_data["headers"][0]["options"]["--purl"], + ), + ( + output_data["headers"][0]["options"]["--file"], + expected_data["headers"][0]["options"]["--file"], + ), + ( + output_data["headers"][0]["options"]["--unique"], + expected_data["headers"][0]["options"]["--unique"], + ), + ] + + for output, expected in result_objects: + assert output == expected + + # See note under test_versions_cli() re addition of new versions. + expected_versions = [] + output_versions = [] + for expected in expected_data["packages"]: + expected_versions = expected["versions"] + for output in output_data["packages"]: + output_versions = output["versions"] + + assert [i for i in expected_versions if i not in output_versions] == [] + + @pytest.mark.parametrize( + "test_input,expected", + [ + ( + ["pkg:pypi/fetchcode"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["pkg:pypi/fetchcode"], + "--file": None, + "--output": "", + }, + "purls": ["pkg:pypi/fetchcode"], + "errors": [], + "warnings": [], + } + ], + "packages": [ + { + "purl": "pkg:pypi/fetchcode", + "versions": [ + { + "purl": "pkg:pypi/fetchcode@0.1.0", + "version": "0.1.0", + "release_date": "2021-08-25T15:15:15.265015+00:00", + }, + { + "purl": "pkg:pypi/fetchcode@0.2.0", + "version": "0.2.0", + "release_date": "2022-09-14T16:36:02.242182+00:00", + }, + { + "purl": "pkg:pypi/fetchcode@0.3.0", + "version": "0.3.0", + "release_date": "2023-12-18T20:49:45.840364+00:00", + }, + ], + } + ], + }, + ), + ( + ["pkg:gem/bundler-sass"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["pkg:gem/bundler-sass"], + "--file": None, + "--output": "", + }, + "purls": ["pkg:gem/bundler-sass"], + "errors": [], + "warnings": [], + } + ], + "packages": [ + { + "purl": "pkg:gem/bundler-sass", + "versions": [ + { + "purl": "pkg:gem/bundler-sass@0.1.2", + "version": "0.1.2", + "release_date": "2013-12-11T00:27:10.097000+00:00", + } + ], + } + ], + }, + ), + ( + ["pkg:rubygems/bundler-sass"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["pkg:rubygems/bundler-sass"], + "--file": None, + "--output": "", + }, + "purls": ["pkg:rubygems/bundler-sass"], + "errors": [], + "warnings": [ + "'pkg:rubygems/bundler-sass' not supported with `versions` command" + ], + } + ], + "packages": [], + }, + ), + ( + ["pkg:nginx/nginx"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["pkg:nginx/nginx"], + "--file": None, + "--output": "", + }, + "purls": ["pkg:nginx/nginx"], + "errors": [], + "warnings": [ + "'pkg:nginx/nginx' not supported with `versions` command" + ], + } + ], + "packages": [], + }, + ), + ( + ["pkg:pypi/zzzzz"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["pkg:pypi/zzzzz"], + "--file": None, + "--output": "", + }, + "purls": ["pkg:pypi/zzzzz"], + "errors": [], + "warnings": [ + "'pkg:pypi/zzzzz' does not exist in the upstream repo" + ], + } + ], + "packages": [], + }, + ), + ( + ["pkg:pypi/?fetchcode"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["pkg:pypi/?fetchcode"], + "--file": None, + "--output": "", + }, + "purls": ["pkg:pypi/?fetchcode"], + "errors": [], + "warnings": ["'pkg:pypi/?fetchcode' not valid"], + } + ], + "packages": [], + }, + ), + ( + ["zzzzz"], + { + "headers": [ + { + "tool_name": "purlcli", + "tool_version": "0.2.0", + "options": { + "command": "versions", + "--purl": ["zzzzz"], + "--file": None, + "--output": "", + }, + "purls": ["zzzzz"], + "errors": [], + "warnings": ["'zzzzz' not valid"], + } + ], + "packages": [], + }, + ), + ], + ) + def test_versions_details(self, test_input, expected): + output = "" file = "" command_name = "versions" + unique = False - purl_versions = purlcli.list_versions(test_input, output, file, command_name) - # TODO: consider `expected in purl_versions` instead of `purl_versions == expected` ==> handles dynamic data in the result better. - assert purl_versions == expected + purl_versions = purlcli.get_versions_details( + test_input, output, file, unique, command_name + ) + + cli_test_utils.streamline_headers(purl_versions["headers"]) + cli_test_utils.streamline_headers(expected["headers"]) + + # See note under test_versions_cli() re addition of new versions. + assert purl_versions["headers"] == expected["headers"] + + for expected in expected["packages"]: + assert expected in purl_versions["packages"] @pytest.mark.parametrize( "test_input,expected", From 4020e1bd28ad7550a21864cda918e8f68056216b Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Wed, 6 Mar 2024 17:06:47 -0800 Subject: [PATCH 4/6] Address PR comments #247 Reference: https://github.com/nexB/purldb/pull/305 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- purldb-toolkit/src/purldb_toolkit/purlcli.py | 43 +++++++++++-------- .../purlcli/expected_versions_output.json | 24 +++++------ .../expected_versions_output_unique.json | 24 +++++------ purldb-toolkit/tests/test_purlcli.py | 8 ++-- 4 files changed, 54 insertions(+), 45 deletions(-) diff --git a/purldb-toolkit/src/purldb_toolkit/purlcli.py b/purldb-toolkit/src/purldb_toolkit/purlcli.py index c08a86de..9c09fb3d 100644 --- a/purldb-toolkit/src/purldb_toolkit/purlcli.py +++ b/purldb-toolkit/src/purldb_toolkit/purlcli.py @@ -156,6 +156,13 @@ def check_metadata_purl(purl): def normalize_purls(purls, unique): + """ + If the command includes the `--unique` flag, take the list of input PURLs, + remove the portion of the PURL that starts with a PURL separator (`@`, `?` + or `#`), and return a deduplicated list of the resulting PURLs (in + `input_purls`) and a list of tuples of each pair of the original input PURL + and the normalized PURL (in `normalized_purls`). + """ input_purls = [] normalized_purls = [] if unique: @@ -220,9 +227,11 @@ def construct_headers( headers_content["purls"] = purls if (command_name in ["metadata", "urls", "validate", "versions"]) and unique: - for purl in normalized_purls: - if purl[0] != purl[1]: - warnings.append(f"input PURL: '{purl[0]}' normalized to '{purl[1]}'") + for input_purl, normalized_purl in normalized_purls: + if input_purl != normalized_purl: + warnings.append( + f"input PURL: '{input_purl}' normalized to '{normalized_purl}'" + ) for purl in purls: if not purl: @@ -240,11 +249,12 @@ def construct_headers( if command_name in ["metadata", "urls", "validate", "versions"]: purl_warning = purl_warnings.get(purl, None) if purl_warning: - warnings.append(warning_text[purl_warning]) - print(warning_text[purl_warning]) + warning = warning_text[purl_warning] + warnings.append(warning) + print(warning) continue - log_file = Path("purldb-toolkit/src/purldb_toolkit/app.log") + log_file = Path(log_file=os.path.join(os.path.expanduser("~"), "app.log")) if log_file.is_file(): with open(log_file, "r") as f: for line in f: @@ -331,19 +341,19 @@ def get_urls_details(purls, output, file, unique, head, command_name): if not purl: continue - urls_purl = check_urls_purl(purl) + purl_status = check_urls_purl(purl) - if command_name == "urls" and urls_purl in [ + if command_name == "urls" and purl_status in [ "validation_error", "not_valid", "valid_but_not_supported", "not_in_upstream_repo", ]: - urls_warnings[purl] = urls_purl + urls_warnings[purl] = purl_status continue - if command_name == "urls" and urls_purl in ["valid_but_not_fully_supported"]: - urls_warnings[purl] = urls_purl + if command_name == "urls" and purl_status in ["valid_but_not_fully_supported"]: + urls_warnings[purl] = purl_status # Add the URLs. url_purl = PackageURL.from_string(purl) @@ -647,7 +657,7 @@ def validate_purl(purl): print(f"validate_purl(): json.decoder.JSONDecodeError for '{purl}': {e}") logging.basicConfig( - filename="purldb-toolkit/src/purldb_toolkit/app.log", + filename=os.path.join(os.path.expanduser("~"), "app.log"), level=logging.ERROR, format="%(levelname)s - %(message)s", filemode="w", @@ -740,7 +750,7 @@ def get_versions_details(purls, output, file, unique, command_name): for package_version in list(versions(purl)): purl_version_data = {} - purl_version = package_version.to_dict()["value"] + purl_version = package_version.value # We use `versions()` from fetchcode/package_versions.py, which # keeps the version (if any) of the input PURL in its output, so @@ -752,9 +762,7 @@ def get_versions_details(purls, output, file, unique, command_name): purl_version_data["purl"] = nested_purl purl_version_data["version"] = f"{purl_version}" - purl_version_data["release_date"] = ( - f'{package_version.to_dict()["release_date"]}' - ) + purl_version_data["release_date"] = f"{package_version.release_date}" purl_data["versions"].append(purl_version_data) @@ -834,7 +842,8 @@ def check_for_duplicate_input_sources(purls, file): def clear_log_file(): - log_file = Path("purldb-toolkit/src/purldb_toolkit/app.log") + log_file = Path(log_file=os.path.join(os.path.expanduser("~"), "app.log")) + if log_file.is_file(): os.remove(log_file) diff --git a/purldb-toolkit/tests/data/purlcli/expected_versions_output.json b/purldb-toolkit/tests/data/purlcli/expected_versions_output.json index 10443de7..da5d9942 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_versions_output.json +++ b/purldb-toolkit/tests/data/purlcli/expected_versions_output.json @@ -113,62 +113,62 @@ { "purl": "pkg:hex/coherence@0.8.0", "version": "0.8.0", - "release_date": "2023-09-22T18:28:36.224103+00:00" + "release_date": "2023-09-22 18:28:36.224103+00:00" }, { "purl": "pkg:hex/coherence@0.5.2", "version": "0.5.2", - "release_date": "2018-09-03T23:52:38.161321+00:00" + "release_date": "2018-09-03 23:52:38.161321+00:00" }, { "purl": "pkg:hex/coherence@0.5.1", "version": "0.5.1", - "release_date": "2018-08-28T01:33:14.565151+00:00" + "release_date": "2018-08-28 01:33:14.565151+00:00" }, { "purl": "pkg:hex/coherence@0.5.0", "version": "0.5.0", - "release_date": "2017-08-02T06:23:12.948525+00:00" + "release_date": "2017-08-02 06:23:12.948525+00:00" }, { "purl": "pkg:hex/coherence@0.4.0", "version": "0.4.0", - "release_date": "2017-07-03T21:55:56.591426+00:00" + "release_date": "2017-07-03 21:55:56.591426+00:00" }, { "purl": "pkg:hex/coherence@0.3.1", "version": "0.3.1", - "release_date": "2016-11-27T05:30:34.553920+00:00" + "release_date": "2016-11-27 05:30:34.553920+00:00" }, { "purl": "pkg:hex/coherence@0.3.0", "version": "0.3.0", - "release_date": "2016-08-28T19:04:10.794525+00:00" + "release_date": "2016-08-28 19:04:10.794525+00:00" }, { "purl": "pkg:hex/coherence@0.2.0", "version": "0.2.0", - "release_date": "2016-07-30T21:07:45.377540+00:00" + "release_date": "2016-07-30 21:07:45.377540+00:00" }, { "purl": "pkg:hex/coherence@0.1.3", "version": "0.1.3", - "release_date": "2016-07-19T03:33:09.185782+00:00" + "release_date": "2016-07-19 03:33:09.185782+00:00" }, { "purl": "pkg:hex/coherence@0.1.2", "version": "0.1.2", - "release_date": "2016-07-12T18:41:27.084599+00:00" + "release_date": "2016-07-12 18:41:27.084599+00:00" }, { "purl": "pkg:hex/coherence@0.1.1", "version": "0.1.1", - "release_date": "2016-07-11T13:56:26.388096+00:00" + "release_date": "2016-07-11 13:56:26.388096+00:00" }, { "purl": "pkg:hex/coherence@0.1.0", "version": "0.1.0", - "release_date": "2016-07-11T06:52:43.545719+00:00" + "release_date": "2016-07-11 06:52:43.545719+00:00" } ] } diff --git a/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json b/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json index 0a7b0869..249604dd 100644 --- a/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json +++ b/purldb-toolkit/tests/data/purlcli/expected_versions_output_unique.json @@ -77,62 +77,62 @@ { "purl": "pkg:hex/coherence@0.8.0", "version": "0.8.0", - "release_date": "2023-09-22T18:28:36.224103+00:00" + "release_date": "2023-09-22 18:28:36.224103+00:00" }, { "purl": "pkg:hex/coherence@0.5.2", "version": "0.5.2", - "release_date": "2018-09-03T23:52:38.161321+00:00" + "release_date": "2018-09-03 23:52:38.161321+00:00" }, { "purl": "pkg:hex/coherence@0.5.1", "version": "0.5.1", - "release_date": "2018-08-28T01:33:14.565151+00:00" + "release_date": "2018-08-28 01:33:14.565151+00:00" }, { "purl": "pkg:hex/coherence@0.5.0", "version": "0.5.0", - "release_date": "2017-08-02T06:23:12.948525+00:00" + "release_date": "2017-08-02 06:23:12.948525+00:00" }, { "purl": "pkg:hex/coherence@0.4.0", "version": "0.4.0", - "release_date": "2017-07-03T21:55:56.591426+00:00" + "release_date": "2017-07-03 21:55:56.591426+00:00" }, { "purl": "pkg:hex/coherence@0.3.1", "version": "0.3.1", - "release_date": "2016-11-27T05:30:34.553920+00:00" + "release_date": "2016-11-27 05:30:34.553920+00:00" }, { "purl": "pkg:hex/coherence@0.3.0", "version": "0.3.0", - "release_date": "2016-08-28T19:04:10.794525+00:00" + "release_date": "2016-08-28 19:04:10.794525+00:00" }, { "purl": "pkg:hex/coherence@0.2.0", "version": "0.2.0", - "release_date": "2016-07-30T21:07:45.377540+00:00" + "release_date": "2016-07-30 21:07:45.377540+00:00" }, { "purl": "pkg:hex/coherence@0.1.3", "version": "0.1.3", - "release_date": "2016-07-19T03:33:09.185782+00:00" + "release_date": "2016-07-19 03:33:09.185782+00:00" }, { "purl": "pkg:hex/coherence@0.1.2", "version": "0.1.2", - "release_date": "2016-07-12T18:41:27.084599+00:00" + "release_date": "2016-07-12 18:41:27.084599+00:00" }, { "purl": "pkg:hex/coherence@0.1.1", "version": "0.1.1", - "release_date": "2016-07-11T13:56:26.388096+00:00" + "release_date": "2016-07-11 13:56:26.388096+00:00" }, { "purl": "pkg:hex/coherence@0.1.0", "version": "0.1.0", - "release_date": "2016-07-11T06:52:43.545719+00:00" + "release_date": "2016-07-11 06:52:43.545719+00:00" } ] } diff --git a/purldb-toolkit/tests/test_purlcli.py b/purldb-toolkit/tests/test_purlcli.py index ebc276e6..d42bc2a0 100644 --- a/purldb-toolkit/tests/test_purlcli.py +++ b/purldb-toolkit/tests/test_purlcli.py @@ -2022,17 +2022,17 @@ def test_versions_cli_unique(self): { "purl": "pkg:pypi/fetchcode@0.1.0", "version": "0.1.0", - "release_date": "2021-08-25T15:15:15.265015+00:00", + "release_date": "2021-08-25 15:15:15.265015+00:00", }, { "purl": "pkg:pypi/fetchcode@0.2.0", "version": "0.2.0", - "release_date": "2022-09-14T16:36:02.242182+00:00", + "release_date": "2022-09-14 16:36:02.242182+00:00", }, { "purl": "pkg:pypi/fetchcode@0.3.0", "version": "0.3.0", - "release_date": "2023-12-18T20:49:45.840364+00:00", + "release_date": "2023-12-18 20:49:45.840364+00:00", }, ], } @@ -2064,7 +2064,7 @@ def test_versions_cli_unique(self): { "purl": "pkg:gem/bundler-sass@0.1.2", "version": "0.1.2", - "release_date": "2013-12-11T00:27:10.097000+00:00", + "release_date": "2013-12-11 00:27:10.097000+00:00", } ], } From 888d8d811b87a7f28f1f3d9a3412864e2ee9cebb Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Wed, 6 Mar 2024 18:59:57 -0800 Subject: [PATCH 5/6] Update app.log path #247 Reference: https://github.com/nexB/purldb/pull/305 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- purldb-toolkit/src/purldb_toolkit/purlcli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/purldb-toolkit/src/purldb_toolkit/purlcli.py b/purldb-toolkit/src/purldb_toolkit/purlcli.py index 9c09fb3d..6f9804f2 100644 --- a/purldb-toolkit/src/purldb_toolkit/purlcli.py +++ b/purldb-toolkit/src/purldb_toolkit/purlcli.py @@ -254,7 +254,7 @@ def construct_headers( print(warning) continue - log_file = Path(log_file=os.path.join(os.path.expanduser("~"), "app.log")) + log_file = Path(os.path.join(os.path.expanduser("~"), "app.log")) if log_file.is_file(): with open(log_file, "r") as f: for line in f: @@ -842,7 +842,7 @@ def check_for_duplicate_input_sources(purls, file): def clear_log_file(): - log_file = Path(log_file=os.path.join(os.path.expanduser("~"), "app.log")) + log_file = Path(os.path.join(os.path.expanduser("~"), "app.log")) if log_file.is_file(): os.remove(log_file) From 9d293e85218f928b64045bf24cd0266bbd9590d9 Mon Sep 17 00:00:00 2001 From: "John M. Horan" Date: Thu, 7 Mar 2024 12:51:26 -0800 Subject: [PATCH 6/6] Rename log file and convert location to constant #247 Reference: https://github.com/nexB/purldb/pull/305 Reference: https://github.com/nexB/purldb/issues/247 Signed-off-by: John M. Horan --- purldb-toolkit/src/purldb_toolkit/purlcli.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/purldb-toolkit/src/purldb_toolkit/purlcli.py b/purldb-toolkit/src/purldb_toolkit/purlcli.py index 6f9804f2..e1809f1d 100644 --- a/purldb-toolkit/src/purldb_toolkit/purlcli.py +++ b/purldb-toolkit/src/purldb_toolkit/purlcli.py @@ -21,6 +21,8 @@ from packageurl import PackageURL from packageurl.contrib import purl2url +LOG_FILE_LOCATION = os.path.join(os.path.expanduser("~"), "purlcli.log") + @click.group() def purlcli(): @@ -254,7 +256,7 @@ def construct_headers( print(warning) continue - log_file = Path(os.path.join(os.path.expanduser("~"), "app.log")) + log_file = Path(LOG_FILE_LOCATION) if log_file.is_file(): with open(log_file, "r") as f: for line in f: @@ -657,7 +659,7 @@ def validate_purl(purl): print(f"validate_purl(): json.decoder.JSONDecodeError for '{purl}': {e}") logging.basicConfig( - filename=os.path.join(os.path.expanduser("~"), "app.log"), + filename=LOG_FILE_LOCATION, level=logging.ERROR, format="%(levelname)s - %(message)s", filemode="w", @@ -842,7 +844,7 @@ def check_for_duplicate_input_sources(purls, file): def clear_log_file(): - log_file = Path(os.path.join(os.path.expanduser("~"), "app.log")) + log_file = Path(LOG_FILE_LOCATION) if log_file.is_file(): os.remove(log_file)