From e4a4ef7a89f2c0ef5a286019730c6df6ef45c199 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 18 Sep 2024 16:40:52 -0700 Subject: [PATCH] fix: concretize calldatacopy (#364) Co-authored-by: karmacoma --- src/halmos/bytevec.py | 44 ++++++++++++++++++++++++++++++++++++++++++- src/halmos/config.py | 16 ++++++++++++++-- src/halmos/sevm.py | 7 +++++-- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/src/halmos/bytevec.py b/src/halmos/bytevec.py index 99cedfdd..c060c979 100644 --- a/src/halmos/bytevec.py +++ b/src/halmos/bytevec.py @@ -4,7 +4,7 @@ from typing import Any, ForwardRef from sortedcontainers import SortedDict -from z3 import BitVecRef, If, eq, is_bool, is_bv, is_bv_value, simplify +from z3 import BitVecRef, If, eq, is_bool, is_bv, is_bv_value, simplify, substitute from .utils import ( Byte, @@ -142,6 +142,33 @@ def __getitem__(self, key) -> Byte | ForwardRef("Chunk"): def __len__(self) -> int: return self.length + def concretize(self, substitution: dict[BitVecRef, BitVecRef]) -> "Chunk": + """ + Replace all symbols in the data with their corresponding concrete values, + if they exist in the given substitution mapping. + + Return a new object with concrete values, or self if no substitution is made. + """ + + if not isinstance(self, SymbolicChunk): + return self + + data = self.data + # TODO: call substitute() only when the data contains variables to be substituted + new_data = substitute(data, *substitution.items()) + + # NOTE: `new_data is data` doesn't work, because substitute() always returns a new object even if no substitution is made + if eq(new_data, data): + return self + + new_data = simplify(new_data) + + return ( + ConcreteChunk(bv_value_to_bytes(new_data), self.start, self.length) + if is_bv_value(new_data) + else SymbolicChunk(new_data, self.start, self.length) + ) + @abstractmethod def get_byte(self, offset) -> Byte: raise NotImplementedError @@ -610,6 +637,21 @@ def set_word(self, offset: int, value: Word) -> None: self.set_slice(offset, offset + 32, value) + def concretize(self, substitution: dict[BitVecRef, BitVecRef]) -> "ByteVec": + """ + Replace all symbols in the chunks with their corresponding concrete values, + if they exist in the given substitution mapping. + + Return a new ByteVec object even if no substitution is made. + """ + + result = ByteVec() + + for chunk in self.chunks.values(): + result.append(chunk.concretize(substitution)) + + return result + ### read operations def slice(self, start: int, stop: int) -> "ByteVec": diff --git a/src/halmos/config.py b/src/halmos/config.py index fad3b0d3..c9d0f963 100644 --- a/src/halmos/config.py +++ b/src/halmos/config.py @@ -188,7 +188,7 @@ class Config: default_bytes_lengths: str = arg( help="set the default length candidates for bytes and string not specified in --array-lengths", - global_default="0,32,1024,65", # 65 is ECDSA signature size + global_default="0,1024,65", # 65 is ECDSA signature size metavar="LENGTH1,LENGTH2,...", action=ParseCSV, ) @@ -549,7 +549,19 @@ def parse_dict(self, parsed: dict, source: str = "halmos.toml") -> dict: ) sys.exit(2) - return {k.replace("-", "_"): v for k, v in data.items()} + # gather custom actions + actions = { + field.name: field.metadata["action"] + for field in fields(Config) + if "action" in field.metadata + } + + result = {} + for key, value in data.items(): + key = key.replace("-", "_") + action = actions.get(key) + result[key] = action.parse(value) if action else value + return result def _create_default_config() -> "Config": diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index 28a731c6..cfd7da4d 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -644,8 +644,10 @@ class Concretization: In the future, it may be used for other purposes, such as concrete hash reasoning. """ - substitution: dict = field(default_factory=dict) # symbol -> constant - candidates: dict = field(default_factory=dict) # symbol -> set of constants + # symbol -> constant + substitution: dict[BitVecRef, BitVecRef] = field(default_factory=dict) + # symbol -> set of constants + candidates: dict[BitVecRef, list[int]] = field(default_factory=dict) def process_cond(self, cond): if not is_eq(cond): @@ -2864,6 +2866,7 @@ def finalize(ex: Exec): if size > 0: data: ByteVec = ex.calldata().slice(offset, offset + size) + data = data.concretize(ex.path.concretization.substitution) ex.st.memory.set_slice(loc, end_loc, data) elif opcode == EVM.CODECOPY: