Small optimizations

- cache bytecode length - cache instruction access: this one is a memory-compute tradeoff, but I expect it to be favorable most of the time because program bytecode is small but accessed frequently. Can do a lru_cache if memory consumption turns out to be a problem - separate __getitem__ and .slice so that __getitem__ can be cached - return cached jump destinations instead of redecoding on the fly in jumpi_id All together, improve the runtime of some tests by about 5%
ApeWorX · Jan 23, 2024 · b3185b1 · b3185b1
1 parent 59fa228
commit b3185b1
Showing 1 changed file with 16 additions and 22 deletions.
diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py
@@ -1,13 +1,12 @@
 # SPDX-License-Identifier: AGPL-3.0
 
-import json
 import math
 import re
 
 from copy import deepcopy
 from collections import defaultdict
 from dataclasses import dataclass, field
-from functools import reduce
+from functools import cache, reduce
 from typing import (
     Any,
     Callable,
@@ -533,6 +532,9 @@ def __init__(self, rawcode: UnionType[bytes, BitVecRef, str]) -> None:
 
         self._rawcode = rawcode
 
+        # cache length since it doesn't change after initialization
+        self._length = byte_length(rawcode)
+
     def __init_jumpdests(self):
         self.jumpdests = set()
 
@@ -563,7 +565,7 @@ def decode_instruction(self, pc: int) -> Instruction:
         opcode = int_of(self[pc])
 
         if EVM.PUSH1 <= opcode <= EVM.PUSH32:
-            operand = self[pc + 1 : pc + opcode - EVM.PUSH0 + 1]
+            operand = self.slice(pc + 1, pc + opcode - EVM.PUSH0 + 1)
             return Instruction(opcode, pc=pc, operand=operand)
 
         return Instruction(opcode, pc=pc)
@@ -572,16 +574,10 @@ def next_pc(self, pc):
         opcode = self[pc]
         return pc + instruction_length(opcode)
 
-    def __getslice__(self, slice):
-        step = 1 if slice.step is None else slice.step
-        if step != 1:
-            return ValueError(f"slice step must be 1 but got {slice}")
-
+    def slice(self, start, stop) -> UnionType[bytes, BitVecRef]:
         # symbolic
         if is_bv(self._rawcode):
-            extracted = extract_bytes(
-                self._rawcode, slice.start, slice.stop - slice.start
-            )
+            extracted = extract_bytes(self._rawcode, start, stop - start)
 
             # check if that part of the code is concrete
             if is_bv_value(extracted):
@@ -591,16 +587,15 @@ def __getslice__(self, slice):
                 return extracted
 
         # concrete
-        size = slice.stop - slice.start
-        data = padded_slice(self._rawcode, slice.start, size, default=0)
+        size = stop - start
+        data = padded_slice(self._rawcode, start, size, default=0)
         return bytes(data)
 
-    def __getitem__(self, key) -> UnionType[int, BitVecRef]:
+    @cache
+    def __getitem__(self, offset: int) -> UnionType[int, BitVecRef]:
         """Returns the byte at the given offset."""
-        if isinstance(key, slice):
-            return self.__getslice__(key)
-
-        offset = int_of(key, "symbolic index into contract bytecode")
+        if not isinstance(offset, int):
+            raise ValueError("unexpected program offset {offset!r}")
 
         # support for negative indexing, e.g. contract[-1]
         if offset < 0:
@@ -622,7 +617,7 @@ def __getitem__(self, key) -> UnionType[int, BitVecRef]:
 
     def __len__(self) -> int:
         """Returns the length of the bytecode in bytes."""
-        return byte_length(self._rawcode)
+        return self._length
 
     def valid_jump_destinations(self) -> set:
         """Returns the set of valid jump destinations."""
@@ -1047,8 +1042,7 @@ def is_jumpdest(self, x: Word) -> bool:
         if pc < 0:
             raise ValueError(pc)
 
-        opcode = unbox_int(self.pgm[pc])
-        return opcode == EVM.JUMPDEST
+        return pc in self.pgm.valid_jump_destinations()
 
     def jumpi_id(self) -> str:
         return f"{self.pc}:" + ",".join(
@@ -2507,7 +2501,7 @@ def finalize(ex: Exec):
                     size: int = int_of(ex.st.pop(), "symbolic CODECOPY size")
                     wextend(ex.st.memory, loc, size)
 
-                    codeslice = ex.pgm[offset : offset + size]
+                    codeslice = ex.pgm.slice(offset, offset + size)
 
                     actual_size = byte_length(codeslice)
                     if actual_size != size: