Skip to content

Commit

Permalink
Address feedback and add docstrings
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Dec 30, 2024
1 parent 2d05cfd commit 2b17cc9
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 79 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/docs-ci.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI Documentation
name: CI Documentation and Code style

on: [push, pull_request]

Expand All @@ -21,7 +21,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Install Dependencies
run: pip install -e .[docs]
run: pip install -e .[docs,testing]

- name: Check Sphinx Documentation build minimally
working-directory: ./docs
Expand All @@ -31,4 +31,5 @@ jobs:
working-directory: ./docs
run: ./scripts/doc8_style_check.sh


- name: Check for Code style errors
run: make check-ci
14 changes: 11 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,19 @@ valid: isort black

check:
@echo "-> Run pycodestyle (PEP8) validation"
@${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,migrations,settings.py,.cache .
@${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,scripts,tests,migrations,settings.py,.cache .
@echo "-> Run isort imports ordering validation"
@${ACTIVATE} isort --sl --check-only -l 100 setup.py src tests .
@${ACTIVATE} isort --sl -l 100 src tests setup.py --check-only
@echo "-> Run black validation"
@${ACTIVATE} black --check --check -l 100 src tests setup.py
@${ACTIVATE} black --check -l 100 src tests setup.py

check-ci:
@echo "-> Run pycodestyle (PEP8) validation"
pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,scripts,tests,migrations,settings.py,.cache .
@echo "-> Run isort imports ordering validation"
isort --sl -l 100 src tests setup.py --check-only
@echo "-> Run black validation"
black --check -l 100 src tests setup.py

clean:
@echo "-> Clean the Python env"
Expand Down
1 change: 0 additions & 1 deletion src/rust_inspector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@
# See https://github.com/nexB/rust-inspector for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

80 changes: 50 additions & 30 deletions src/rust_inspector/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
import lief
import json
import os
import zlib

import lief
from typecode import contenttype
from typecode.contenttype import get_type

Expand Down Expand Up @@ -67,7 +67,7 @@ def get_rust_packages_data(location):
to the rust binary with data on packages and dependencies.
See https://github.com/rust-secure-code/cargo-auditable for more info.
Code for parsing rust bianries to get package data is from
Code for parsing rust binaries to get package data is from
https://github.com/rust-secure-code/cargo-auditable/blob/master/PARSING.md
"""
if not is_executable_binary(location):
Expand All @@ -87,9 +87,12 @@ def get_rust_packages_data(location):
return packages_data



def might_have_rust_symbols(string_with_symbols):

"""
Given a demangled symbol string obtained from a rust binary, return True if
there are rust symbols present in the string which could be mapped to rust
source symbols potentially, return False otherwise.
"""
if not string_with_symbols:
return False

Expand Down Expand Up @@ -120,16 +123,20 @@ def might_have_rust_symbols(string_with_symbols):

return True

def remove_standard_symbols(rust_symbols):
return [
symbol
for symbol in rust_symbols
if symbol not in STANDARD_SYMBOLS_RUST
]

def remove_standard_symbols(rust_symbols, standard_symbols=STANDARD_SYMBOLS_RUST):
"""
Remove standard symbols usually found in rust binaries. Given a list of rust
symbol strings, return a list of symbol strings which are most likely non-standard.
"""
return [symbol for symbol in rust_symbols if symbol not in standard_symbols]


def split_strings_by_char(split_strings, split_char):

"""
Given a list of strings, return another list of strings with all
the substrings from each string, split by the `split_char`.
"""
final_split_strings = []
for split_str in split_strings:
if split_char in split_str:
Expand All @@ -138,15 +145,16 @@ def split_strings_by_char(split_strings, split_char):
else:
final_split_strings.append(split_str)

return [
split_string
for split_string in final_split_strings
if split_string
]
return [split_string for split_string in final_split_strings if split_string]


def split_strings_into_rust_symbols(strings_to_split, split_by_chars=SPLIT_CHARACTERS_RUST):

"""
Given a list of strings containing a group of rust symbols, get a list
of strings with the extracted individual symbol strings, using a list of
`split_by_chars` which are common characters found between rust symbols in
demangled rust string containing multiple symbols.
"""
split_strings = []
split_strings_log = []
for split_char in split_by_chars:
Expand All @@ -159,10 +167,17 @@ def split_strings_into_rust_symbols(strings_to_split, split_by_chars=SPLIT_CHARA
return split_strings


def cleanup_symbols(split_symbols, include_stdlib=False, unique=True, sort_symbols=False):
def cleanup_symbols(symbols, include_stdlib=False, unique=True, sort_symbols=False):
"""
Given a list of `symbols` strings, return a list of cleaned up
symbol strings, removing strings which does not have symbols.
If `include_stdlib` is False, remove standard rust symbols.
If `unique` is True, only return unique symbol strings.
If `sort_symbols` is True, return a sorted list of symbols.
"""
rust_symbols = []
for split_string in split_symbols:
for split_string in symbols:
if might_have_rust_symbols(split_string):
rust_symbols.append(split_string)

Expand All @@ -178,17 +193,22 @@ def cleanup_symbols(split_symbols, include_stdlib=False, unique=True, sort_symbo
return rust_symbols


def extract_strings_with_symbols(symbols_data, include_stdlib=False, unique=True, sort_symbols=False):

def extract_strings_with_symbols(
symbols_data, include_stdlib=False, unique=True, sort_symbols=False
):
"""
From a list of rust symbols data parsed and demangled from a binary,
return a list of individual symbols (after cleanup) found in the strings.
"""
strings_with_symbols = []

ignore_types = ["NOTYPE", "TLS"]

for symbol_data in symbols_data:

if not symbol_data.get("name"):
continue

if symbol_data.get("type") in ignore_types:
continue

Expand All @@ -202,14 +222,14 @@ def extract_strings_with_symbols(symbols_data, include_stdlib=False, unique=True
# These are usually like the following:
# `getrandom@GLIBC_2.25`, `__umodti3`, `_ITM_registerTMCloneTable`
# So these doesn't have source symbols
if symbol_data.get("binding") == 'WEAK':
if symbol_data.get("binding") == "WEAK":
continue

# file/module names are also source symbols as they
# are imported in source code files
if symbol_data.get("type") == "FILE":
file_string = symbol_data.get("name")
file_segments = file_string.split('.')
file_segments = file_string.split(".")
if not file_segments:
continue

Expand All @@ -227,7 +247,7 @@ def extract_strings_with_symbols(symbols_data, include_stdlib=False, unique=True

split_symbols = split_strings_into_rust_symbols(strings_to_split=strings_with_symbols)
rust_symbols = cleanup_symbols(
split_symbols=split_symbols,
symbols=split_symbols,
include_stdlib=include_stdlib,
unique=unique,
sort_symbols=sort_symbols,
Expand All @@ -240,7 +260,6 @@ def collect_and_parse_rust_symbols(location, include_stdlib=False, sort_symbols=
"""
Return a mapping of Rust symbols of interest for the Rust binary file at ``location``.
Return an empty mapping if there is no symbols or if this is not a binary.
Raise exceptions on errors.
"""
if not is_executable_binary(location):
return
Expand All @@ -254,11 +273,12 @@ def collect_and_parse_rust_symbols(location, include_stdlib=False, sort_symbols=
)


def collect_and_parse_rust_symbols_from_data(rust_data, include_stdlib=False, unique=True, sort_symbols=False, **kwargs):
def collect_and_parse_rust_symbols_from_data(
rust_data, include_stdlib=False, unique=True, sort_symbols=False, **kwargs
):
"""
Return a mapping of Rust symbols of interest for the mapping of Rust binary of ``rust_data``.
Return an empty mapping if there is no symbols or if this is not a binary.
Raise exceptions on errors.
"""
if not rust_data:
return {}
Expand Down
38 changes: 26 additions & 12 deletions src/rust_inspector/blint_binary.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
import lief
from symbolic._lowlevel import ffi, lib
from symbolic.utils import encode_str, decode_str, rustcall
#
# Copyright (c) OWASP Foundation
# SPDX-License-Identifier: MIT
#
# Originally taken from
# https://github.com/owasp-dep-scan/blint/blob/1e1250a4bf6c25eccba8970bd877901ee56070c7/blint/lib/binary.py
# Used after minor modifications.
#

import lief
from symbolic._lowlevel import ffi
from symbolic._lowlevel import lib
from symbolic.utils import decode_str
from symbolic.utils import encode_str
from symbolic.utils import rustcall

# TODO: Consider using blint as a dependency instead of vendoring


def demangle_symbolic_name(symbol, lang=None, no_args=False):
"""Demangles symbol using llvm demangle falling back to some heuristics. Covers legacy rust."""
"""
Return a demangled symbol string, given a symbol string.
Demangles symbols obtained from a rust binary using llvm demangle (using symbolic),
falling back to some heuristics. Also covers legacy rust.
"""
try:
func = lib.symbolic_demangle_no_args if no_args else lib.symbolic_demangle
lang_str = encode_str(lang) if lang else ffi.NULL
Expand All @@ -27,7 +43,10 @@ def demangle_symbolic_name(symbol, lang=None, no_args=False):
or symbol.startswith(".rdata$")
or symbol.startswith(".refptr.")
):
symbol = f"__declspec(dllimport) {symbol.removeprefix('__imp_').removeprefix('.rdata$').removeprefix('.refptr.')}"
symbol_without_prefix = (
symbol.removeprefix("__imp_").removeprefix(".rdata$").removeprefix(".refptr.")
)
symbol = f"__declspec(dllimport) {symbol_without_prefix}"
demangled_symbol = (
symbol.replace("..", "::")
.replace("$SP$", "@")
Expand Down Expand Up @@ -58,13 +77,8 @@ def demangle_symbolic_name(symbol, lang=None, no_args=False):

def parse_symbols(symbols):
"""
Parse symbols from a list of symbols.
Args:
symbols (it_symbols): A list of symbols to parse.
Returns:
tuple[list[dict], str]: A tuple containing the symbols_list and exe_type
Parse symbols from a list of symbol strings and get a list of symbol
data, with the demangled symbol string and other attributes for the symbol.
"""
symbols_list = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ download_url: https://github.com/owasp-dep-scan/blint/blob/1e1250a4bf6c25eccba89
license_expression: mit
copyright: Copyright (c) OWASP Foundation
package_url: pkg:pypi/[email protected]
notice_file: blint_binary.py.LICENSE
notes: only a subset of functions from binary.py is used, after minor modifications
File renamed without changes.
5 changes: 3 additions & 2 deletions src/rust_inspector/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
SPLIT_CHARACTERS_RUST = ["::", "_<", "<", ">", "(", ")", ",", " as ", " for "]



# Standard symbols present in rust binaries which are not usually from rust
# source files, and sometimes they are standard library symbols
STANDARD_SYMBOLS_RUST = [
"std",
"vector",
]
]
Loading

0 comments on commit 2b17cc9

Please sign in to comment.