diff --git a/README.md b/README.md index 0fcd2e8..31bbf2e 100644 --- a/README.md +++ b/README.md @@ -2,41 +2,21 @@ Parsing AST with solidity source code and get contract information. +## Quickstart -## Usage +### Installation -Using combined json, support for contract with multiple source files is limited: +Clone this repository and install it with pip: -``` python -from solc_json_parser.combined_json_parser import CombinedJsonParser - -# The input can be a file path or source code -parser = CombinedJsonParser('contracts/BlackScholesEstimate_8.sol') -parser.all_contract_names - -# List all functions in contract -parser.functions_in_contract_by_name('BlackScholesEstimate', name_only=True) - -# Get source code by program counter -parser.source_by_pc('BlackScholesEstimate', 92) -{'pc': 92, - 'fragment': 'function retBasedBlackScholesEstimate(\n uint256[] memory _numbers,\n uint256 _underlying,\n uint256 _time\n ) public pure {\n uint _vol = stddev(_numbers);\n blackScholesEstimate(_vol, _underlying, _time);\n }', - 'begin': 2633, - 'end': 2877, - 'linenums': (69, 76), - 'source_idx': 0, - 'source_path': 'BlackScholesEstimate_8.sol'} - - -# Get deployment code by contract name -parser.get_deploy_bin_by_contract_name('BlackScholesEstimate') - -# Get literal values by contract name -parser.get_literals('BlackScholesEstimate', True) -{'number': {0, 1, 2, 40}, 'string': set(), 'address': set(), 'other': set()} +``` bash +git clone https://github.com/sbip-sg/solc-json-parser.git +cd solc-json-parser +pip install . ``` -Using [standard json](https://docs.soliditylang.org/en/v0.8.17/using-the-compiler.html#compiler-input-and-output-json-description): +### Usage + +Example usage using [standard json](https://docs.soliditylang.org/en/v0.8.17/using-the-compiler.html#compiler-input-and-output-json-description): ``` python import json @@ -47,10 +27,69 @@ with open('contracts/standard_json/75b8.standard-input.json') as f: version = '0.8.4' parser = StandardJsonParser(input_json, version) -# Other usages are the same as combined json parser +# Get all contract names +parser.all_contract_names +# ['IERC1271', +# ... +# 'ContractKeys', +# 'NFTfiSigningUtils', +# 'NftReceiver', +# 'Ownable'] + +# Get source code by PC +source = parser.source_by_pc('DirectLoanFixedOffer', 13232) +source +# {'pc': 13232, +# 'linenums': [921, 924], +# 'fragment': 'LoanChecksAndCalculations.computeRevenueShare(\n adminFee,\n loanExtras.revenueShareInBasisPoints\n )', +# 'fid': 'contracts/loans/direct/loanTypes/DirectLoanBaseMinimal.sol', +# 'begin': 45007, +# 'end': 45134, +# 'source_idx': 26, +# 'source_path': 'contracts/loans/direct/loanTypes/DirectLoanBaseMinimal.sol'} + +# Get function AST unit by PC +func = parser.function_unit_by_pc('DirectLoanFixedOffer', 13232) +# Parameter names of this function +[n.get('name') for n in func.get('parameters').get('parameters')] +# ['_loanId', '_borrower', '_lender', '_loan'] +# Function selector, available only for external or public functions +func.get('functionSelector') + +# Get the innermost AST unit by PC +parser.ast_unit_by_pc('DirectLoanFixedOffer', 13232) +``` + +## Command line tools + +``` bash +solc-json-parser --help +``` + +Decode binary to opcodes: + +``` bash +❯ solc-json-parser dp 0x60806040525f80fdfea26469706673582212200466fd4ed0d73499199c39545f7019da158defa354cc0051afe02754ec8e32b464736f6c63430008180033 +PUSH1 0x80 +PUSH1 0x40 +MSTORE +PUSH0 0x +DUP1 +REVERT +INVALID +LOG2 +PUSH5 0x6970667358 +0X22 +SLT +SHA3 +DIV +PUSH7 0xfd4ed0d7349919 +SWAP13 +... ``` + ## Note - This library only supports detecting Solidity version newer than or equal to diff --git a/setup.py b/setup.py index 6a8ee28..bd8f434 100644 --- a/setup.py +++ b/setup.py @@ -20,4 +20,9 @@ setup_requires=['pytest-runner'], tests_require=['pytest>=4.4.1'], test_suite='tests', + entry_points={ + 'console_scripts': [ + 'solc-json-parser=solc_json_parser.cli:main', + ], + }, ) diff --git a/solc_json_parser/cli.py b/solc_json_parser/cli.py new file mode 100644 index 0000000..4219832 --- /dev/null +++ b/solc_json_parser/cli.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +import argparse +from . import opcodes + +def main(): + parser = argparse.ArgumentParser(description='CLI tool description.') + subparsers = parser.add_subparsers(dest='command', help='Subcommands') + decode_parser = subparsers.add_parser('decode_binary', aliases=['dp'], help='Decode binary data') + decode_parser.add_argument('data', type=str, help='Binary data to decode') + + args = parser.parse_args() + + if args.command in ['decode_binary', 'dp']: + opcodes.decode_and_print(args.data) + else: + parser.print_help() + +if __name__ == "__main__": + main() diff --git a/solc_json_parser/opcodes.py b/solc_json_parser/opcodes.py index 67f94ec..1f2ea4d 100644 --- a/solc_json_parser/opcodes.py +++ b/solc_json_parser/opcodes.py @@ -1,4 +1,4 @@ -from typing import Optional, Dict +from typing import Dict name_to_byte: Dict[str, int] = dict( # @@ -99,6 +99,7 @@ # # Push Operations # + PUSH0 = 0x5f, PUSH1 = 0x60, PUSH2 = 0x61, PUSH3 = 0x62, @@ -196,7 +197,36 @@ CREATE2 = 0xf5, STATICCALL = 0xfa, REVERT = 0xfd, + INVALID = 0xfe, SELFDESTRUCT = 0xff, ) byte_to_name: Dict[int, str] = {v : k for k, v in name_to_byte.items()} + +# convert and print 0x60606040526000357c010 to human readable opcodes +def decode_and_print(binary_hex): + if binary_hex[:2].lower() == '0x': + binary_hex = binary_hex[2:] + i = 0 + while i < len(binary_hex): + offset = i + 2 + opcode = int(binary_hex[i:i+2], 16) + opcode_name = byte_to_name.get(opcode) + if opcode_name is None: + # raise ValueError(f"Unknown opcode: {opcode:02x}, from: {opcodes[i:]}") + print(f"0x{opcode:02x}".upper()) + else: + if opcode_name.startswith('PUSH'): + length = int(opcode_name[4:]) + offset += length*2 + if length > 0: + print(f"{opcode_name} 0x{binary_hex[i+2:offset]}") + else: + print(opcode_name) + else: + print(opcode_name) + i = offset + +# convert opcodes to binary hex and print +def encode_and_print(opcodes): + pass diff --git a/solc_json_parser/standard_json_parser.py b/solc_json_parser/standard_json_parser.py index 1137b10..d133b48 100644 --- a/solc_json_parser/standard_json_parser.py +++ b/solc_json_parser/standard_json_parser.py @@ -9,6 +9,15 @@ from .base_parser import BaseParser from .fields import Field, Function, ContractData, Modifier, Event, Literal +def node_contains(src_str: str, pc_source: dict) -> bool: + """ + Check if the source code contains the given pc_source + """ + if not src_str: + return False + offset, length, _fidx = list(map(int, src_str.split(':'))) + return offset <= pc_source['begin'] and offset + length >= pc_source['end'] + def compile_standard(version: str, input_json: dict, solc_bin_resolver: Callable[[str], str] = solc_bin, cwd: Optional[str]=None): ''' Compile standard input json and parse output as json. @@ -181,11 +190,22 @@ def override_settings(input_json): class StandardJsonParser(BaseParser): - def __init__(self, input_json: Union[dict, str], version: str, solc_bin_resolver: Callable[[str], str] = solc_bin, cwd: Optional[str] = None): + def __init__(self, input_json: Union[dict, str], version: str, solc_bin_resolver: Callable[[str], str] = solc_bin, cwd: Optional[str] = None, + retry_num: Optional[int]=0, + try_install_solc: Optional[bool]=False, + solc_options: Optional[Dict] = {}): + if retry_num is not None and retry_num > 0: + raise Exception('StandardJsonParser does not support retry') + super().__init__() self.file_path = None self.solc_version: str = version - self.input_json: dict = input_json if isinstance(input_json, dict) else json.loads(input_json) + try: + # try parse as json + self.input_json: dict = input_json if isinstance(input_json, dict) else json.loads(input_json) + except json.JSONDecodeError: + # try use input as a plain source file + self.input_json = StandardJsonParser.__prepare_standard_input(input_json) self.input_json = override_settings(self.input_json) @@ -195,11 +215,40 @@ def __init__(self, input_json: Union[dict, str], version: str, solc_bin_resolver self.cwd = cwd self.output_json = compile_standard(version, self.input_json, solc_bin_resolver, cwd) + if has_compilation_error(self.output_json): raise SolidityAstError(f"Compile failed: {self.output_json.get('errors')}" ) self.post_configure_compatible_fields() + @staticmethod + def __prepare_standard_input(source: str) -> Dict: + if '\n' not in source: + with open(source, 'r') as f: + source = f.read() + + input_json = { + 'language': 'Solidity', + 'sources': { + 'source.sol': { + 'content': source + } + }, + 'settings': { + 'optimizer': { + 'enabled': False, + }, + 'evmVersion': 'istanbul', + 'outputSelection': { + '*': { + '*': [ '*' ], + '': ['ast'] + } + } + } + } + return input_json + def prepare_by_version(self): super().prepare_by_version() @@ -271,6 +320,12 @@ def post_configure_compatible_fields(self): def source_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[dict]: + """ + Get source code by program counter(pc) in a contract. + - `contract_name`: contract name in string + - `pc`: program counter in integer + - `deploy`: set to True if the PC is from the deployment code instead of runtime code. Default is False + """ evms = evms_by_contract_name(self.output_json, contract_name) for _, evm in evms: code, pc2idx, *_ = self.__build_pc2idx(evm, deploy) @@ -279,6 +334,58 @@ def source_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[di return result return None + def __extract_node(self, pred: Callable, root_node: List[Dict], first_only=True) -> List[Dict]: + to_visit = [root_node] + found = [] + while True: + if not to_visit: + break + + node = to_visit.pop(0) + + if type(node) not in {dict, list}: + continue + + if type(node) == list: + to_visit += node + continue + + children = list(node.values()) + + if children: + to_visit += children + if pred(node): + found.append(node) + if first_only: + break + + return found + + def ast_units_by_pc(self, contract_name: str, pc: int, node_type: Optional[str], deploy=False, first_only=False) -> List[Dict]: + """ + Get all AST units by PC + """ + pc_source = self.source_by_pc(contract_name, pc, deploy) + if not pc_source: + return [] + pred = lambda node: node and (node_type is None or node.get('nodeType') == node_type) and node_contains(node.get('src'), pc_source) + return self.__extract_node(pred, self.output_json['sources'][pc_source['fid']]['ast'], first_only=first_only) + + def function_unit_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[Dict]: + """ + Get the function AST unit containing the PC + """ + units = self.ast_units_by_pc(contract_name, pc, 'FunctionDefinition', deploy, first_only=True) + return units[0] if units else None + + def ast_unit_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[Dict]: + """ + Get the smallest AST unit containing the PC + """ + units = self.ast_units_by_pc(contract_name, pc, node_type=None, deploy=deploy, first_only=False) + return units[-1] if units else None + + def __build_pc2idx(self, evm: dict, deploy: bool = False) -> Tuple[list, dict, dict]: return build_pc2idx(evm, deploy) diff --git a/tests/test_standard_json_parser_multifile.py b/tests/test_standard_json_parser_multifile.py index 92aee3e..dc38a95 100644 --- a/tests/test_standard_json_parser_multifile.py +++ b/tests/test_standard_json_parser_multifile.py @@ -52,6 +52,17 @@ def test_standard_json_source_mapping(self): self.assertEqual(e, expected) + def test_function_unit_by_pc(self): + expected_data = [ + {'pc': 427, 'function': 'withdraw', 'selector': '3ccfd60b'}, + ] + + for expected in expected_data: + pc = expected['pc'] + actual = self.parser.function_unit_by_pc(self.main_contract, pc, False) + + self.assertEqual(expected['function'], actual['name']) + self.assertEqual(expected['selector'], actual['functionSelector']) def test_all_contract_name(self): expected_contract_names = {'A', 'B', 'Main'} diff --git a/tests/test_standard_json_parser_v6_contract.py b/tests/test_standard_json_parser_v6_contract.py index a19302f..c874a97 100644 --- a/tests/test_standard_json_parser_v6_contract.py +++ b/tests/test_standard_json_parser_v6_contract.py @@ -20,17 +20,45 @@ def setUp(self): def test_source_by_pc(self): tests = [ - # pc or function name -> (line start and end) - ('approve', (463, 466),), - ('_approve', (601, 607),), (3012, (538, 538),), (3698, (284, 284),), ] for (fname_or_pc, lines) in tests: - if type(fname_or_pc) == str: - func = self.parser.function_by_name(self.main_contract, fname_or_pc) - assert lines == tuple(func.line_num), 'Start and end line numbers of the function setRule is not correct' - else: - result = self.parser.source_by_pc(self.main_contract, fname_or_pc) or {} - assert lines == tuple(result.get('linenums')), 'Start and end line numbers of the function setRule is not correct' + result = self.parser.source_by_pc(self.main_contract, fname_or_pc) or {} + assert lines == tuple(result.get('linenums')), 'Start and end line numbers of the function setRule is not correct' + + def test_function_by_name(self): + tests = [ + ('approve', (463, 466),), + ('_approve', (601, 607),), + ] + + for (fname, lines) in tests: + func = self.parser.function_by_name(self.main_contract, fname) + assert lines == tuple(func.line_num), 'Start and end line numbers of the function setRule is not correct' + + + def test_function_ast_unit_by_pc(self): + tests = [ + ('_transfer', 3012,), + ('sub', 3698,), + ] + + for (fname, pc) in tests: + result = self.parser.function_unit_by_pc(self.main_contract, pc) or {} + assert fname == result.get('name'), 'Function name is not correct' + + def test_ast_unit_by_pc(self): + tests = [ + ('address', ['commonType', 'typeString'], 3012,), + ('bool', ['typeDescriptions', 'typeString'], 3698,), + ] + + for (fname, keys, pc) in tests: + result = self.parser.ast_unit_by_pc(self.main_contract, pc) or {} + val = result + source = self.parser.source_by_pc(self.main_contract, pc) + for k in keys: + val = val.get(k) + assert fname == val, f'Unexpected unit found, ast unit: {result} source: {source}'