diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 8c2abfe..929abd7 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -1,4 +1,4 @@ -name: CI Documentation +name: CI Documentation and Code style on: [push, pull_request] @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Dependencies - run: pip install -e .[docs] + run: pip install -e .[docs,testing] - name: Check Sphinx Documentation build minimally working-directory: ./docs @@ -31,4 +31,5 @@ jobs: working-directory: ./docs run: ./scripts/doc8_style_check.sh - + - name: Check for Code style errors + run: make check-ci diff --git a/Makefile b/Makefile index 94451b3..9840741 100644 --- a/Makefile +++ b/Makefile @@ -33,11 +33,19 @@ valid: isort black check: @echo "-> Run pycodestyle (PEP8) validation" - @${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,migrations,settings.py,.cache . + @${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,scripts,tests,migrations,settings.py,.cache . @echo "-> Run isort imports ordering validation" - @${ACTIVATE} isort --sl --check-only -l 100 setup.py src tests . + @${ACTIVATE} isort --sl -l 100 src tests setup.py --check-only @echo "-> Run black validation" - @${ACTIVATE} black --check --check -l 100 src tests setup.py + @${ACTIVATE} black --check -l 100 src tests setup.py + +check-ci: + @echo "-> Run pycodestyle (PEP8) validation" + pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,scripts,tests,migrations,settings.py,.cache . + @echo "-> Run isort imports ordering validation" + isort --sl -l 100 src tests setup.py --check-only + @echo "-> Run black validation" + black --check -l 100 src tests setup.py clean: @echo "-> Clean the Python env" diff --git a/src/rust_inspector/__init__.py b/src/rust_inspector/__init__.py index bcb2369..5cec191 100644 --- a/src/rust_inspector/__init__.py +++ b/src/rust_inspector/__init__.py @@ -7,4 +7,3 @@ # See https://github.com/nexB/rust-inspector for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - diff --git a/src/rust_inspector/binary.py b/src/rust_inspector/binary.py index 627dfe4..1117765 100644 --- a/src/rust_inspector/binary.py +++ b/src/rust_inspector/binary.py @@ -8,11 +8,11 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import os -import lief import json +import os import zlib +import lief from typecode import contenttype from typecode.contenttype import get_type @@ -67,7 +67,7 @@ def get_rust_packages_data(location): to the rust binary with data on packages and dependencies. See https://github.com/rust-secure-code/cargo-auditable for more info. - Code for parsing rust bianries to get package data is from + Code for parsing rust binaries to get package data is from https://github.com/rust-secure-code/cargo-auditable/blob/master/PARSING.md """ if not is_executable_binary(location): @@ -87,9 +87,12 @@ def get_rust_packages_data(location): return packages_data - def might_have_rust_symbols(string_with_symbols): - + """ + Given a demangled symbol string obtained from a rust binary, return True if + there are rust symbols present in the string which could be mapped to rust + source symbols potentially, return False otherwise. + """ if not string_with_symbols: return False @@ -120,16 +123,20 @@ def might_have_rust_symbols(string_with_symbols): return True -def remove_standard_symbols(rust_symbols): - return [ - symbol - for symbol in rust_symbols - if symbol not in STANDARD_SYMBOLS_RUST - ] + +def remove_standard_symbols(rust_symbols, standard_symbols=STANDARD_SYMBOLS_RUST): + """ + Remove standard symbols usually found in rust binaries. Given a list of rust + symbol strings, return a list of symbol strings which are most likely non-standard. + """ + return [symbol for symbol in rust_symbols if symbol not in standard_symbols] def split_strings_by_char(split_strings, split_char): - + """ + Given a list of strings, return another list of strings with all + the substrings from each string, split by the `split_char`. + """ final_split_strings = [] for split_str in split_strings: if split_char in split_str: @@ -138,15 +145,16 @@ def split_strings_by_char(split_strings, split_char): else: final_split_strings.append(split_str) - return [ - split_string - for split_string in final_split_strings - if split_string - ] + return [split_string for split_string in final_split_strings if split_string] def split_strings_into_rust_symbols(strings_to_split, split_by_chars=SPLIT_CHARACTERS_RUST): - + """ + Given a list of strings containing a group of rust symbols, get a list + of strings with the extracted individual symbol strings, using a list of + `split_by_chars` which are common characters found between rust symbols in + demangled rust string containing multiple symbols. + """ split_strings = [] split_strings_log = [] for split_char in split_by_chars: @@ -159,10 +167,17 @@ def split_strings_into_rust_symbols(strings_to_split, split_by_chars=SPLIT_CHARA return split_strings -def cleanup_symbols(split_symbols, include_stdlib=False, unique=True, sort_symbols=False): +def cleanup_symbols(symbols, include_stdlib=False, unique=True, sort_symbols=False): + """ + Given a list of `symbols` strings, return a list of cleaned up + symbol strings, removing strings which does not have symbols. + If `include_stdlib` is False, remove standard rust symbols. + If `unique` is True, only return unique symbol strings. + If `sort_symbols` is True, return a sorted list of symbols. + """ rust_symbols = [] - for split_string in split_symbols: + for split_string in symbols: if might_have_rust_symbols(split_string): rust_symbols.append(split_string) @@ -178,17 +193,22 @@ def cleanup_symbols(split_symbols, include_stdlib=False, unique=True, sort_symbo return rust_symbols -def extract_strings_with_symbols(symbols_data, include_stdlib=False, unique=True, sort_symbols=False): - +def extract_strings_with_symbols( + symbols_data, include_stdlib=False, unique=True, sort_symbols=False +): + """ + From a list of rust symbols data parsed and demangled from a binary, + return a list of individual symbols (after cleanup) found in the strings. + """ strings_with_symbols = [] - + ignore_types = ["NOTYPE", "TLS"] for symbol_data in symbols_data: if not symbol_data.get("name"): continue - + if symbol_data.get("type") in ignore_types: continue @@ -202,14 +222,14 @@ def extract_strings_with_symbols(symbols_data, include_stdlib=False, unique=True # These are usually like the following: # `getrandom@GLIBC_2.25`, `__umodti3`, `_ITM_registerTMCloneTable` # So these doesn't have source symbols - if symbol_data.get("binding") == 'WEAK': + if symbol_data.get("binding") == "WEAK": continue # file/module names are also source symbols as they # are imported in source code files if symbol_data.get("type") == "FILE": file_string = symbol_data.get("name") - file_segments = file_string.split('.') + file_segments = file_string.split(".") if not file_segments: continue @@ -227,7 +247,7 @@ def extract_strings_with_symbols(symbols_data, include_stdlib=False, unique=True split_symbols = split_strings_into_rust_symbols(strings_to_split=strings_with_symbols) rust_symbols = cleanup_symbols( - split_symbols=split_symbols, + symbols=split_symbols, include_stdlib=include_stdlib, unique=unique, sort_symbols=sort_symbols, @@ -240,7 +260,6 @@ def collect_and_parse_rust_symbols(location, include_stdlib=False, sort_symbols= """ Return a mapping of Rust symbols of interest for the Rust binary file at ``location``. Return an empty mapping if there is no symbols or if this is not a binary. - Raise exceptions on errors. """ if not is_executable_binary(location): return @@ -254,11 +273,12 @@ def collect_and_parse_rust_symbols(location, include_stdlib=False, sort_symbols= ) -def collect_and_parse_rust_symbols_from_data(rust_data, include_stdlib=False, unique=True, sort_symbols=False, **kwargs): +def collect_and_parse_rust_symbols_from_data( + rust_data, include_stdlib=False, unique=True, sort_symbols=False, **kwargs +): """ Return a mapping of Rust symbols of interest for the mapping of Rust binary of ``rust_data``. Return an empty mapping if there is no symbols or if this is not a binary. - Raise exceptions on errors. """ if not rust_data: return {} diff --git a/src/rust_inspector/blint_binary.py b/src/rust_inspector/blint_binary.py index d1dbde5..5a3e01c 100644 --- a/src/rust_inspector/blint_binary.py +++ b/src/rust_inspector/blint_binary.py @@ -1,13 +1,29 @@ -import lief -from symbolic._lowlevel import ffi, lib -from symbolic.utils import encode_str, decode_str, rustcall +# +# Copyright (c) OWASP Foundation +# SPDX-License-Identifier: MIT +# +# Originally taken from +# https://github.com/owasp-dep-scan/blint/blob/1e1250a4bf6c25eccba8970bd877901ee56070c7/blint/lib/binary.py +# Used after minor modifications. +# +import lief +from symbolic._lowlevel import ffi +from symbolic._lowlevel import lib +from symbolic.utils import decode_str +from symbolic.utils import encode_str +from symbolic.utils import rustcall # TODO: Consider using blint as a dependency instead of vendoring def demangle_symbolic_name(symbol, lang=None, no_args=False): - """Demangles symbol using llvm demangle falling back to some heuristics. Covers legacy rust.""" + """ + Return a demangled symbol string, given a symbol string. + + Demangles symbols obtained from a rust binary using llvm demangle (using symbolic), + falling back to some heuristics. Also covers legacy rust. + """ try: func = lib.symbolic_demangle_no_args if no_args else lib.symbolic_demangle lang_str = encode_str(lang) if lang else ffi.NULL @@ -27,7 +43,10 @@ def demangle_symbolic_name(symbol, lang=None, no_args=False): or symbol.startswith(".rdata$") or symbol.startswith(".refptr.") ): - symbol = f"__declspec(dllimport) {symbol.removeprefix('__imp_').removeprefix('.rdata$').removeprefix('.refptr.')}" + symbol_without_prefix = ( + symbol.removeprefix("__imp_").removeprefix(".rdata$").removeprefix(".refptr.") + ) + symbol = f"__declspec(dllimport) {symbol_without_prefix}" demangled_symbol = ( symbol.replace("..", "::") .replace("$SP$", "@") @@ -58,13 +77,8 @@ def demangle_symbolic_name(symbol, lang=None, no_args=False): def parse_symbols(symbols): """ - Parse symbols from a list of symbols. - - Args: - symbols (it_symbols): A list of symbols to parse. - - Returns: - tuple[list[dict], str]: A tuple containing the symbols_list and exe_type + Parse symbols from a list of symbol strings and get a list of symbol + data, with the demangled symbol string and other attributes for the symbol. """ symbols_list = [] diff --git a/src/rust_inspector/blint_binary.ABOUT b/src/rust_inspector/blint_binary.py.ABOUT similarity index 93% rename from src/rust_inspector/blint_binary.ABOUT rename to src/rust_inspector/blint_binary.py.ABOUT index 295f96b..a928701 100644 --- a/src/rust_inspector/blint_binary.ABOUT +++ b/src/rust_inspector/blint_binary.py.ABOUT @@ -7,4 +7,5 @@ download_url: https://github.com/owasp-dep-scan/blint/blob/1e1250a4bf6c25eccba89 license_expression: mit copyright: Copyright (c) OWASP Foundation package_url: pkg:pypi/blint@2.3.2 +notice_file: blint_binary.py.LICENSE notes: only a subset of functions from binary.py is used, after minor modifications diff --git a/src/rust_inspector/blint_binary.LICENSE b/src/rust_inspector/blint_binary.py.LICENSE similarity index 100% rename from src/rust_inspector/blint_binary.LICENSE rename to src/rust_inspector/blint_binary.py.LICENSE diff --git a/src/rust_inspector/config.py b/src/rust_inspector/config.py index 6323b1e..f7cfe6d 100644 --- a/src/rust_inspector/config.py +++ b/src/rust_inspector/config.py @@ -16,8 +16,9 @@ SPLIT_CHARACTERS_RUST = ["::", "_<", "<", ">", "(", ")", ",", " as ", " for "] - +# Standard symbols present in rust binaries which are not usually from rust +# source files, and sometimes they are standard library symbols STANDARD_SYMBOLS_RUST = [ "std", "vector", -] \ No newline at end of file +] diff --git a/src/rust_inspector/packages.py b/src/rust_inspector/packages.py index df7d681..50dd7f3 100644 --- a/src/rust_inspector/packages.py +++ b/src/rust_inspector/packages.py @@ -15,14 +15,15 @@ """ Extract packages information from Rust binaries using Lief. -This gets packages from +This gets packages from binaries which are built using `cargo-auditable`. +See https://github.com/rust-secure-code/cargo-auditable for more info +on `cargo-auditable`. """ def collect_rust_packages(location, package_only=False, **kwargs): """ Yield cargo PackageData found in the Rust binary file at ``location``. - Raise exceptions on errors. """ binary_data = get_rust_packages_data(location=location) yield from collect_rust_packages_from_data( @@ -34,8 +35,8 @@ def collect_rust_packages(location, package_only=False, **kwargs): def collect_rust_packages_from_data(binary_data, package_only=False, **kwargs): """ - Yield cargo PackageData found in the Rust binary file ``binary_data`` mapping extracted from - ``location``. Raise exceptions on errors. + Yield all the cargo PackageData with their dependencies found in the Rust binary file + from ``binary_data`` present in a rust binary. The data has this shape:: { @@ -81,7 +82,13 @@ def collect_rust_packages_from_data(binary_data, package_only=False, **kwargs): def get_rust_package_from_data(package_data, packages_by_index, package_only=False): + """ + Yield a PackageData with it's dependencies from a data mapping `package_data` + containing package and dependencies information for a single cargo package. + `packages_by_index` is a mapping of DependentPackage objects by their index in + the list of packages present in the rust binary. + """ from packagedcode.models import PackageData name = package_data.get("name") @@ -96,9 +103,11 @@ def get_rust_package_from_data(package_data, packages_by_index, package_only=Fal is_private = True elif package_data.get("source") == "crates.io": - repository_homepage_url = name and f'https://crates.io/crates/{name}' - repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download' - api_data_url = name and f'https://crates.io/api/v1/crates/{name}' + repository_homepage_url = name and f"https://crates.io/crates/{name}" + repository_download_url = ( + name and version and f"https://crates.io/api/v1/crates/{name}/{version}/download" + ) + api_data_url = name and f"https://crates.io/api/v1/crates/{name}" dependencies = [] for dependency_index in package_data.get("dependencies", []): @@ -120,7 +129,9 @@ def get_rust_package_from_data(package_data, packages_by_index, package_only=Fal def get_dependent_package(package_data): - + """ + Get a DependentPackage object from a cargo `package_data` mapping. + """ from packagedcode.models import DependentPackage name = package_data.get("name") @@ -140,6 +151,10 @@ def get_dependent_package(package_data): def get_rust_binary_handler(): + """ + Return `RustBinaryHandler` class to parse and get packages information from + rust binary files. + """ from packagedcode import models class RustBinaryHandler(models.DatafileHandler): @@ -154,7 +169,9 @@ class RustBinaryHandler(models.DatafileHandler): default_package_type = "cargo" default_primary_language = "Rust" description = "Rust binary" - documentation_url = "https://github.com/rust-secure-code/cargo-auditable/blob/master/PARSING.md" + documentation_url = ( + "https://github.com/rust-secure-code/cargo-auditable/blob/master/PARSING.md" + ) @classmethod def is_datafile(cls, location): diff --git a/tests/test_binary.py b/tests/test_binary.py index 68807c9..8329667 100644 --- a/tests/test_binary.py +++ b/tests/test_binary.py @@ -9,19 +9,17 @@ # import json -import lief import os +import lief import pytest from commoncode.testcase import FileDrivenTesting from scancode.cli_test_utils import check_json from scancode_config import REGEN_TEST_FIXTURES from rust_inspector import binary - from rust_inspector.blint_binary import parse_symbols - test_env = FileDrivenTesting() test_env.test_data_dir = os.path.join(os.path.dirname(__file__), "data") @@ -72,13 +70,22 @@ def test_get_rust_packages_data_large(): check_json(expected, rust_packages_data, regen=REGEN_TEST_FIXTURES) - @pytest.mark.parametrize( "split_strings,split_char,expected_split_strings", [ ( - ["core::ptr::drop_in_place"], "::", - ["core", "ptr", "drop_in_place"] + ["core::ptr::drop_in_place"], + "::", + [ + "core", + "ptr", + "drop_in_place", + ], ), ], ) @@ -92,7 +99,17 @@ def test_split_strings_by_char(split_strings, split_char, expected_split_strings [ ( ["core::ptr::drop_in_place"], - ["core", "ptr", "drop_in_place", "cyclonedx_bom", "specs", "common", "bom", "v1_5", "Bom"] + [ + "core", + "ptr", + "drop_in_place", + "cyclonedx_bom", + "specs", + "common", + "bom", + "v1_5", + "Bom", + ], ), ], ) @@ -106,7 +123,7 @@ def test_split_strings_into_rust_symbols(strings_to_split, expected_split_string [ ( ["async_io::reactor::Reactor::process_timers::__CALLSITE::META"], - ["async_io", "reactor", "Reactor", "process_timers"] + ["async_io", "reactor", "Reactor", "process_timers"], ), ], ) @@ -119,10 +136,10 @@ def test_split_strings_into_cleaned_rust_symbols(strings_to_split, symbols): def test_might_have_rust_symbols(): strings_with_symbols = ["async_io::reactor::Reactor::process_timers::__CALLSITE::META"] final_split_strings = binary.split_strings_into_rust_symbols(strings_with_symbols) - assert sum([ - binary.might_have_rust_symbols(split_string) - for split_string in final_split_strings - ]) == 4 + assert ( + sum([binary.might_have_rust_symbols(split_string) for split_string in final_split_strings]) + == 4 + ) def test_extract_strings_with_symbols(): @@ -130,15 +147,20 @@ def test_extract_strings_with_symbols(): with open(symbols_data_file) as res: rust_symbols_data = json.load(res) - extracted_symbols = binary.extract_strings_with_symbols(symbols_data=rust_symbols_data, sort_symbols=True) + extracted_symbols = binary.extract_strings_with_symbols( + symbols_data=rust_symbols_data, sort_symbols=True + ) expected = test_env.get_test_loc("binary-with-deps/cargo_dependencies-symbols-cleaned.json") check_json(expected, extracted_symbols, regen=REGEN_TEST_FIXTURES) + def test_extract_strings_with_symbols_large(): symbols_data_file = test_env.get_test_loc("trustier/trustier-symbols.json") with open(symbols_data_file) as res: rust_symbols_data = json.load(res) - extracted_symbols = binary.extract_strings_with_symbols(symbols_data=rust_symbols_data, sort_symbols=True) + extracted_symbols = binary.extract_strings_with_symbols( + symbols_data=rust_symbols_data, sort_symbols=True + ) expected = test_env.get_test_loc("trustier/trustier-symbols-cleaned.json") - check_json(expected, extracted_symbols, regen=REGEN_TEST_FIXTURES) \ No newline at end of file + check_json(expected, extracted_symbols, regen=REGEN_TEST_FIXTURES) diff --git a/tests/test_packages.py b/tests/test_packages.py index 44a9b6e..f35bda4 100644 --- a/tests/test_packages.py +++ b/tests/test_packages.py @@ -18,12 +18,10 @@ from rust_inspector import packages - test_env = FileDrivenTesting() test_env.test_data_dir = os.path.join(os.path.dirname(__file__), "data") - def test_can_collect_rust_packages_from_data(): packages_data_file = test_env.get_test_loc("binary-with-deps/cargo_dependencies-packages.json") with open(packages_data_file) as res: diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 6765330..74024c0 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -10,7 +10,6 @@ import os - from commoncode.testcase import FileDrivenTesting from scancode.cli_test_utils import check_json_scan from scancode.cli_test_utils import run_scan_click @@ -27,4 +26,3 @@ def test_scancode_plugin_with_rust_symbol_option(): run_scan_click(args) expected = test_env.get_test_loc("binary-with-deps/cargo_dependencies-scancode.expected.json") check_json_scan(expected, result_file, regen=REGEN_TEST_FIXTURES) -