Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AST unit from PC #83

Merged
merged 6 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 70 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,21 @@

Parsing AST with solidity source code and get contract information.

## Quickstart

## Usage
### Installation

Using combined json, support for contract with multiple source files is limited:
Clone this repository and install it with pip:

``` python
from solc_json_parser.combined_json_parser import CombinedJsonParser

# The input can be a file path or source code
parser = CombinedJsonParser('contracts/BlackScholesEstimate_8.sol')
parser.all_contract_names

# List all functions in contract
parser.functions_in_contract_by_name('BlackScholesEstimate', name_only=True)

# Get source code by program counter
parser.source_by_pc('BlackScholesEstimate', 92)
{'pc': 92,
'fragment': 'function retBasedBlackScholesEstimate(\n uint256[] memory _numbers,\n uint256 _underlying,\n uint256 _time\n ) public pure {\n uint _vol = stddev(_numbers);\n blackScholesEstimate(_vol, _underlying, _time);\n }',
'begin': 2633,
'end': 2877,
'linenums': (69, 76),
'source_idx': 0,
'source_path': 'BlackScholesEstimate_8.sol'}


# Get deployment code by contract name
parser.get_deploy_bin_by_contract_name('BlackScholesEstimate')

# Get literal values by contract name
parser.get_literals('BlackScholesEstimate', True)
{'number': {0, 1, 2, 40}, 'string': set(), 'address': set(), 'other': set()}
``` bash
git clone https://github.com/sbip-sg/solc-json-parser.git
cd solc-json-parser
pip install .
```

Using [standard json](https://docs.soliditylang.org/en/v0.8.17/using-the-compiler.html#compiler-input-and-output-json-description):
### Usage

Example usage using [standard json](https://docs.soliditylang.org/en/v0.8.17/using-the-compiler.html#compiler-input-and-output-json-description):

``` python
import json
Expand All @@ -47,10 +27,69 @@ with open('contracts/standard_json/75b8.standard-input.json') as f:
version = '0.8.4'
parser = StandardJsonParser(input_json, version)

# Other usages are the same as combined json parser
# Get all contract names
parser.all_contract_names
# ['IERC1271',
# ...
# 'ContractKeys',
# 'NFTfiSigningUtils',
# 'NftReceiver',
# 'Ownable']

# Get source code by PC
source = parser.source_by_pc('DirectLoanFixedOffer', 13232)
source
# {'pc': 13232,
# 'linenums': [921, 924],
# 'fragment': 'LoanChecksAndCalculations.computeRevenueShare(\n adminFee,\n loanExtras.revenueShareInBasisPoints\n )',
# 'fid': 'contracts/loans/direct/loanTypes/DirectLoanBaseMinimal.sol',
# 'begin': 45007,
# 'end': 45134,
# 'source_idx': 26,
# 'source_path': 'contracts/loans/direct/loanTypes/DirectLoanBaseMinimal.sol'}

# Get function AST unit by PC
func = parser.function_unit_by_pc('DirectLoanFixedOffer', 13232)
# Parameter names of this function
[n.get('name') for n in func.get('parameters').get('parameters')]
# ['_loanId', '_borrower', '_lender', '_loan']
# Function selector, available only for external or public functions
func.get('functionSelector')

# Get the innermost AST unit by PC
parser.ast_unit_by_pc('DirectLoanFixedOffer', 13232)
```

## Command line tools

``` bash
solc-json-parser --help
```

Decode binary to opcodes:

``` bash
❯ solc-json-parser dp 0x60806040525f80fdfea26469706673582212200466fd4ed0d73499199c39545f7019da158defa354cc0051afe02754ec8e32b464736f6c63430008180033
PUSH1 0x80
PUSH1 0x40
MSTORE
PUSH0 0x
DUP1
REVERT
INVALID
LOG2
PUSH5 0x6970667358
0X22
SLT
SHA3
DIV
PUSH7 0xfd4ed0d7349919
SWAP13
...
```



## Note

- This library only supports detecting Solidity version newer than or equal to
Expand Down
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@
setup_requires=['pytest-runner'],
tests_require=['pytest>=4.4.1'],
test_suite='tests',
entry_points={
'console_scripts': [
'solc-json-parser=solc_json_parser.cli:main',
],
},
)
19 changes: 19 additions & 0 deletions solc_json_parser/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env python3
import argparse
from . import opcodes

def main():
parser = argparse.ArgumentParser(description='CLI tool description.')
subparsers = parser.add_subparsers(dest='command', help='Subcommands')
decode_parser = subparsers.add_parser('decode_binary', aliases=['dp'], help='Decode binary data')
decode_parser.add_argument('data', type=str, help='Binary data to decode')

args = parser.parse_args()

if args.command in ['decode_binary', 'dp']:
opcodes.decode_and_print(args.data)
else:
parser.print_help()

if __name__ == "__main__":
main()
32 changes: 31 additions & 1 deletion solc_json_parser/opcodes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Dict
from typing import Dict

name_to_byte: Dict[str, int] = dict(
#
Expand Down Expand Up @@ -99,6 +99,7 @@
#
# Push Operations
#
PUSH0 = 0x5f,
PUSH1 = 0x60,
PUSH2 = 0x61,
PUSH3 = 0x62,
Expand Down Expand Up @@ -196,7 +197,36 @@
CREATE2 = 0xf5,
STATICCALL = 0xfa,
REVERT = 0xfd,
INVALID = 0xfe,
SELFDESTRUCT = 0xff,
)

byte_to_name: Dict[int, str] = {v : k for k, v in name_to_byte.items()}

# convert and print 0x60606040526000357c010 to human readable opcodes
def decode_and_print(binary_hex):
if binary_hex[:2].lower() == '0x':
binary_hex = binary_hex[2:]
i = 0
while i < len(binary_hex):
offset = i + 2
opcode = int(binary_hex[i:i+2], 16)
opcode_name = byte_to_name.get(opcode)
if opcode_name is None:
# raise ValueError(f"Unknown opcode: {opcode:02x}, from: {opcodes[i:]}")
print(f"0x{opcode:02x}".upper())
else:
if opcode_name.startswith('PUSH'):
length = int(opcode_name[4:])
offset += length*2
if length > 0:
print(f"{opcode_name} 0x{binary_hex[i+2:offset]}")
else:
print(opcode_name)
else:
print(opcode_name)
i = offset

# convert opcodes to binary hex and print
def encode_and_print(opcodes):
pass
111 changes: 109 additions & 2 deletions solc_json_parser/standard_json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
from .base_parser import BaseParser
from .fields import Field, Function, ContractData, Modifier, Event, Literal

def node_contains(src_str: str, pc_source: dict) -> bool:
"""
Check if the source code contains the given pc_source
"""
if not src_str:
return False
offset, length, _fidx = list(map(int, src_str.split(':')))
return offset <= pc_source['begin'] and offset + length >= pc_source['end']

def compile_standard(version: str, input_json: dict, solc_bin_resolver: Callable[[str], str] = solc_bin, cwd: Optional[str]=None):
'''
Compile standard input json and parse output as json.
Expand Down Expand Up @@ -181,11 +190,22 @@ def override_settings(input_json):


class StandardJsonParser(BaseParser):
def __init__(self, input_json: Union[dict, str], version: str, solc_bin_resolver: Callable[[str], str] = solc_bin, cwd: Optional[str] = None):
def __init__(self, input_json: Union[dict, str], version: str, solc_bin_resolver: Callable[[str], str] = solc_bin, cwd: Optional[str] = None,
retry_num: Optional[int]=0,
try_install_solc: Optional[bool]=False,
solc_options: Optional[Dict] = {}):
if retry_num is not None and retry_num > 0:
raise Exception('StandardJsonParser does not support retry')

super().__init__()
self.file_path = None
self.solc_version: str = version
self.input_json: dict = input_json if isinstance(input_json, dict) else json.loads(input_json)
try:
# try parse as json
self.input_json: dict = input_json if isinstance(input_json, dict) else json.loads(input_json)
except json.JSONDecodeError:
# try use input as a plain source file
self.input_json = StandardJsonParser.__prepare_standard_input(input_json)

self.input_json = override_settings(self.input_json)

Expand All @@ -195,11 +215,40 @@ def __init__(self, input_json: Union[dict, str], version: str, solc_bin_resolver
self.cwd = cwd

self.output_json = compile_standard(version, self.input_json, solc_bin_resolver, cwd)

if has_compilation_error(self.output_json):
raise SolidityAstError(f"Compile failed: {self.output_json.get('errors')}" )

self.post_configure_compatible_fields()

@staticmethod
def __prepare_standard_input(source: str) -> Dict:
if '\n' not in source:
with open(source, 'r') as f:
source = f.read()

input_json = {
'language': 'Solidity',
'sources': {
'source.sol': {
'content': source
}
},
'settings': {
'optimizer': {
'enabled': False,
},
'evmVersion': 'istanbul',
'outputSelection': {
'*': {
'*': [ '*' ],
'': ['ast']
}
}
}
}
return input_json


def prepare_by_version(self):
super().prepare_by_version()
Expand Down Expand Up @@ -271,6 +320,12 @@ def post_configure_compatible_fields(self):


def source_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[dict]:
"""
Get source code by program counter(pc) in a contract.
- `contract_name`: contract name in string
- `pc`: program counter in integer
- `deploy`: set to True if the PC is from the deployment code instead of runtime code. Default is False
"""
evms = evms_by_contract_name(self.output_json, contract_name)
for _, evm in evms:
code, pc2idx, *_ = self.__build_pc2idx(evm, deploy)
Expand All @@ -279,6 +334,58 @@ def source_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[di
return result
return None

def __extract_node(self, pred: Callable, root_node: List[Dict], first_only=True) -> List[Dict]:
to_visit = [root_node]
found = []
while True:
if not to_visit:
break

node = to_visit.pop(0)

if type(node) not in {dict, list}:
continue

if type(node) == list:
to_visit += node
continue

children = list(node.values())

if children:
to_visit += children
if pred(node):
found.append(node)
if first_only:
break

return found

def ast_units_by_pc(self, contract_name: str, pc: int, node_type: Optional[str], deploy=False, first_only=False) -> List[Dict]:
"""
Get all AST units by PC
"""
pc_source = self.source_by_pc(contract_name, pc, deploy)
if not pc_source:
return []
pred = lambda node: node and (node_type is None or node.get('nodeType') == node_type) and node_contains(node.get('src'), pc_source)
return self.__extract_node(pred, self.output_json['sources'][pc_source['fid']]['ast'], first_only=first_only)

def function_unit_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[Dict]:
"""
Get the function AST unit containing the PC
"""
units = self.ast_units_by_pc(contract_name, pc, 'FunctionDefinition', deploy, first_only=True)
return units[0] if units else None

def ast_unit_by_pc(self, contract_name: str, pc: int, deploy=False) -> Optional[Dict]:
"""
Get the smallest AST unit containing the PC
"""
units = self.ast_units_by_pc(contract_name, pc, node_type=None, deploy=deploy, first_only=False)
return units[-1] if units else None


def __build_pc2idx(self, evm: dict, deploy: bool = False) -> Tuple[list, dict, dict]:
return build_pc2idx(evm, deploy)

Expand Down
11 changes: 11 additions & 0 deletions tests/test_standard_json_parser_multifile.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ def test_standard_json_source_mapping(self):

self.assertEqual(e, expected)

def test_function_unit_by_pc(self):
expected_data = [
{'pc': 427, 'function': 'withdraw', 'selector': '3ccfd60b'},
]

for expected in expected_data:
pc = expected['pc']
actual = self.parser.function_unit_by_pc(self.main_contract, pc, False)

self.assertEqual(expected['function'], actual['name'])
self.assertEqual(expected['selector'], actual['functionSelector'])

def test_all_contract_name(self):
expected_contract_names = {'A', 'B', 'Main'}
Expand Down
Loading
Loading