From af8d7d4ec80bbe0c7a38d57562f28b25c344159b Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Wed, 6 Apr 2022 10:15:26 +0200
Subject: [PATCH 1/8] Document disassemble_single method

---
 rainbow/rainbow.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/rainbow/rainbow.py b/rainbow/rainbow.py
index 102dd83..7cd96db 100644
--- a/rainbow/rainbow.py
+++ b/rainbow/rainbow.py
@@ -18,11 +18,10 @@
 
 
 import math
-import os
 import weakref
+from typing import Tuple
 import capstone as cs
 import colorama
-import lief
 import unicorn as uc
 from pygments import highlight
 from pygments.formatters import TerminalFormatter as formatter
@@ -330,13 +329,20 @@ def trace_mem(self, uci, access, address, size, value, user_data):
                 val = color("CYAN", f"{val:8x}")
                 print(f"  {val} <- [{addr}]", end=" ")
 
-    def disassemble_single(self, addr, size):
-        """ Disassemble a single instruction at address """
+    def disassemble_single(self, addr: int, size: int) -> Tuple[int, int, str, str]:
+        """Disassemble a single instruction using Capstone lite
+
+        This returns the address, size, mnemonic, and operands of the
+        instruction at the specified address and size (in bytes).
+
+        If you want more information, you should use disassemble_single_detailed
+        method, but is 30% slower according to Capstone documentation.
+        """
         instruction = self.emu.mem_read(addr, size)
         return next(self.disasm.disasm_lite(bytes(instruction), addr, 1))
 
-    def disassemble_single_detailed(self, addr, size):
-        """ Disassemble a single instruction at addr """
+    def disassemble_single_detailed(self, addr: int, size: int) -> cs.CsInsn:
+        """Disassemble a single instruction using Capstone"""
         instruction = self.emu.mem_read(addr, 2 * size)
         return next(self.disasm.disasm(bytes(instruction), addr, 1))
 

From c6ad7392fbadcefb100eaa2338d606e6636a86e3 Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Wed, 6 Apr 2022 11:52:57 +0200
Subject: [PATCH 2/8] Add LRU cache on Capstone calls

---
 rainbow/rainbow.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/rainbow/rainbow.py b/rainbow/rainbow.py
index 7cd96db..54aa879 100644
--- a/rainbow/rainbow.py
+++ b/rainbow/rainbow.py
@@ -17,9 +17,10 @@
 # Copyright 2019 Victor Servant, Ledger SAS
 
 
+import functools
 import math
 import weakref
-from typing import Tuple
+from typing import Callable, Tuple
 import capstone as cs
 import colorama
 import unicorn as uc
@@ -329,6 +330,12 @@ def trace_mem(self, uci, access, address, size, value, user_data):
                 val = color("CYAN", f"{val:8x}")
                 print(f"  {val} <- [{addr}]", end=" ")
 
+    # Least-recently used cache for Capstone calls to disasm or disasm_lite
+    @staticmethod
+    @functools.lru_cache(maxsize=4096)
+    def _disassemble_cache(call: Callable, instruction: bytes, addr: int):
+        return next(call(instruction, addr, 1))
+
     def disassemble_single(self, addr: int, size: int) -> Tuple[int, int, str, str]:
         """Disassemble a single instruction using Capstone lite
 
@@ -338,13 +345,13 @@ def disassemble_single(self, addr: int, size: int) -> Tuple[int, int, str, str]:
         If you want more information, you should use disassemble_single_detailed
         method, but is 30% slower according to Capstone documentation.
         """
-        instruction = self.emu.mem_read(addr, size)
-        return next(self.disasm.disasm_lite(bytes(instruction), addr, 1))
+        insn = self.emu.mem_read(addr, size)
+        return self._disassemble_cache(self.disasm.disasm_lite, bytes(insn), addr)
 
     def disassemble_single_detailed(self, addr: int, size: int) -> cs.CsInsn:
         """Disassemble a single instruction using Capstone"""
-        instruction = self.emu.mem_read(addr, 2 * size)
-        return next(self.disasm.disasm(bytes(instruction), addr, 1))
+        insn = self.emu.mem_read(addr, 2 * size)
+        return self._disassemble_cache(self.disasm.disasm, bytes(insn), addr)
 
     def print_asmline(self, adr, ins, op_str):
         """ Pretty-print assembly using pygments syntax highlighting """

From 6538cdbdafc62c93e2dd48df846c6de7a55a01d7 Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Wed, 6 Apr 2022 18:25:04 +0200
Subject: [PATCH 3/8] Add LRU cache to get accessed registers

---
 rainbow/tracers.py | 57 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/rainbow/tracers.py b/rainbow/tracers.py
index 03d909d..ebc2352 100644
--- a/rainbow/tracers.py
+++ b/rainbow/tracers.py
@@ -1,16 +1,59 @@
+# This file is part of rainbow
+#
+# rainbow is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# Copyright 2020 Victor Servant, Ledger SAS
+
+import functools
+from typing import List, Tuple
+import capstone as cs
+
 from .utils import hw
 
-def regs_hw_sum_trace(rbw, address, size, data):
+
+# Least-recently used cache for register access extraction
+@functools.lru_cache(maxsize=4096)
+def registers_accessed_by_instruction(insn: cs.CsInsn) -> Tuple[List[int], List[int]]:
+    """Return read and written registers by a single instruction
+
+    Registers are represented with Capstone identifiers which mostly maps to
+    Unicorn identifiers.
+    """
+    return insn.regs_access()
+
+
+def regs_hw_sum_trace(rbw, address: int, size: int, _data):
+    """Trace written registers Hamming weight
+
+    For each instruction, this tracer sums the Hamming weight of all written
+    registers.
+
+    This tracer is hooked by default if sca_mode=True and sca_HD=False.
+    You may hook it with Unicorn as an `uc.UC_HOOK_CODE` hook.
+    """
     ins = rbw.reg_leak
     if ins is not None:
-      _, regs_written = ins.regs_access()
-      v = sum(hw(rbw.emu.reg_read(rbw.reg_map[ins.reg_name(i)])) for i in regs_written)
+        _, regs_written = registers_accessed_by_instruction(ins)
+        v = sum(hw(rbw.emu.reg_read(r)) for r in regs_written)
 
-      rbw.sca_address_trace.append( f"{address:8X} {ins.mnemonic:<6}  {ins.op_str}" )
-      rbw.sca_values_trace.append(v)
+        rbw.sca_address_trace.append(f"{ins.address:8X} {ins.mnemonic:<6}  {ins.op_str}")
+        rbw.sca_values_trace.append(v)
 
     rbw.reg_leak = rbw.disassemble_single_detailed(address, size)
 
+
 def wb_regs_trace(rbw, address, size, data):
     """One point per register value, and filter out uninteresting register accesses"""
     if rbw.reg_leak:
@@ -23,6 +66,6 @@ def wb_regs_trace(rbw, address, size, data):
     rbw.reg_leak = None
 
     ins = rbw.disassemble_single_detailed(address, size)
-    _regs_read, regs_written = ins.regs_access()
+    _regs_read, regs_written = registers_accessed_by_instruction(ins)
     if len(regs_written) > 0:
-        rbw.reg_leak = (ins, regs_written) 
\ No newline at end of file
+        rbw.reg_leak = (ins, regs_written)

From 11d5791c25c2bc420d4288522c059afa895b9ec1 Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Thu, 7 Apr 2022 09:26:29 +0200
Subject: [PATCH 4/8] Fix SyntaxWarning due to 'is' with literal

---
 rainbow/rainbow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rainbow/rainbow.py b/rainbow/rainbow.py
index 54aa879..1e4becd 100644
--- a/rainbow/rainbow.py
+++ b/rainbow/rainbow.py
@@ -404,7 +404,7 @@ def code_trace(self, uci, address, size, data):
             while True:
                 s = input("Press Enter to continue, or Input an address and a size to display an address: ")
 
-                if s is '':
+                if s == '':
                     break
                 try:
                     address = eval("0x"+s.split(" ")[0])

From 56c21c232e600866d80c3cbd198aedd78e200938 Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Thu, 7 Apr 2022 09:51:51 +0200
Subject: [PATCH 5/8] Import regs_hw_sum_trace outside of sca_code_trace

---
 rainbow/rainbow.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/rainbow/rainbow.py b/rainbow/rainbow.py
index 1e4becd..a8f1231 100644
--- a/rainbow/rainbow.py
+++ b/rainbow/rainbow.py
@@ -28,8 +28,9 @@
 from pygments.formatters import TerminalFormatter as formatter
 from pygments.lexers import NasmLexer
 
-from rainbow.color_functions import color
-from rainbow.loaders import load_selector
+from .color_functions import color
+from .loaders import load_selector
+from .tracers import regs_hw_sum_trace
 
 
 class HookWeakMethod:
@@ -362,10 +363,9 @@ def print_asmline(self, adr, ins, op_str):
         )
         print("\n" + color("YELLOW", f"{adr:8X}  ") + line, end=";")
 
-    def sca_code_trace(self, uci, address, size, data):
-        from .tracers import regs_hw_sum_trace
+    def sca_code_trace(self, _uci, address, size, data):
         regs_hw_sum_trace(self, address, size, data)
-          
+
     def sca_code_traceHD(self, uci, address, size, data):
         """
         Hook that traces modified register values in side-channel mode.

From b26abac0f08ba449df6665e9eaf093f553e4a1ba Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Tue, 12 Apr 2022 14:00:21 +0200
Subject: [PATCH 6/8] Move regs_hd_sum_trace to tracers.py

---
 rainbow/rainbow.py | 24 ++----------------------
 rainbow/tracers.py | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/rainbow/rainbow.py b/rainbow/rainbow.py
index a8f1231..886d759 100644
--- a/rainbow/rainbow.py
+++ b/rainbow/rainbow.py
@@ -30,7 +30,7 @@
 
 from .color_functions import color
 from .loaders import load_selector
-from .tracers import regs_hw_sum_trace
+from .tracers import regs_hd_sum_trace, regs_hw_sum_trace
 
 
 class HookWeakMethod:
@@ -367,27 +367,7 @@ def sca_code_trace(self, _uci, address, size, data):
         regs_hw_sum_trace(self, address, size, data)
 
     def sca_code_traceHD(self, uci, address, size, data):
-        """
-        Hook that traces modified register values in side-channel mode.
-
-        Capstone 4's 'regs_access' method is used to find out which registers are explicitly modified by an instruction.
-        Once found, the information is stored in self.reg_leak to be stored at the next instruction, once the unicorn engine actually performed the current instruction.
-        """
-        if self.trace:
-            if self.reg_leak is not None:
-                for x in self.reg_leak[1]:
-                    if x not in self.TRACE_DISCARD:
-                        self.sca_address_trace.append(self.reg_leak[0])
-                        self.sca_values_trace.append(self.RegistersBackup[self.reg_map[x]] ^ uci.reg_read(self.reg_map[x]))
-                        self.RegistersBackup[self.reg_map[x]] = uci.reg_read(self.reg_map[x])
-
-            self.reg_leak = None
-
-            ins = self.disassemble_single_detailed(address, size)
-            _regs_read, regs_written = ins.regs_access()
-            if len(regs_written) > 0:
-                self.reg_leak = (f"{address:8X} {ins.mnemonic:<6}  {ins.op_str}",list(map(ins.reg_name, regs_written))
-                )
+        regs_hd_sum_trace(self, address, size, data)
 
     def code_trace(self, uci, address, size, data):
         """ 
diff --git a/rainbow/tracers.py b/rainbow/tracers.py
index ebc2352..29356df 100644
--- a/rainbow/tracers.py
+++ b/rainbow/tracers.py
@@ -45,12 +45,47 @@ def regs_hw_sum_trace(rbw, address: int, size: int, _data):
     """
     ins = rbw.reg_leak
     if ins is not None:
+        # Find out which registers are modified
         _, regs_written = registers_accessed_by_instruction(ins)
         v = sum(hw(rbw.emu.reg_read(r)) for r in regs_written)
 
         rbw.sca_address_trace.append(f"{ins.address:8X} {ins.mnemonic:<6}  {ins.op_str}")
         rbw.sca_values_trace.append(v)
 
+    # Information is stored to be used at the next instruction,
+    # once the unicorn engine actually performed the current instruction.
+    rbw.reg_leak = rbw.disassemble_single_detailed(address, size)
+
+
+def regs_hd_sum_trace(rbw, address: int, size: int, _data):
+    """Trace written registers Hamming distance
+
+    For each instruction, this tracer sums the Hamming distance of all written
+    registers with their last value.
+
+    You may filter out uninteresting register accesses using TRACE_DISCARD
+    attribute.
+
+    This tracer is hooked by default if sca_mode=True and sca_HD=True.
+    You may hook it with Unicorn as an `uc.UC_HOOK_CODE` hook.
+    """
+    ins = rbw.reg_leak
+    if ins is not None:
+        # Find out which registers are modified
+        _, regs_written = registers_accessed_by_instruction(ins)
+
+        v = 0
+        for r in regs_written:
+            if r in rbw.TRACE_DISCARD:
+                continue
+            v += rbw.RegistersBackup[r] ^ rbw.emu.reg_read(r)
+            rbw.RegistersBackup[r] = rbw.emu.reg_read(r)
+
+        rbw.sca_address_trace.append(f"{ins.address:8X} {ins.mnemonic:<6}  {ins.op_str}")
+        rbw.sca_values_trace.append(v)
+
+    # Information is stored to be used at the next instruction,
+    # once the unicorn engine actually performed the current instruction.
     rbw.reg_leak = rbw.disassemble_single_detailed(address, size)
 
 

From 4640f227619fb512c6e3191d396f9b08ef519fe1 Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Tue, 12 Apr 2022 14:08:01 +0200
Subject: [PATCH 7/8] Remove useless reg_map lookup in wb_regs_trace

---
 rainbow/tracers.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/rainbow/tracers.py b/rainbow/tracers.py
index 29356df..1ad3d26 100644
--- a/rainbow/tracers.py
+++ b/rainbow/tracers.py
@@ -89,18 +89,26 @@ def regs_hd_sum_trace(rbw, address: int, size: int, _data):
     rbw.reg_leak = rbw.disassemble_single_detailed(address, size)
 
 
-def wb_regs_trace(rbw, address, size, data):
-    """One point per register value, and filter out uninteresting register accesses"""
-    if rbw.reg_leak:
-      ins = rbw.reg_leak[0]
-      for reg in map(ins.reg_name, rbw.reg_leak[1]):
-          if reg not in rbw.TRACE_DISCARD:
-            rbw.sca_address_trace.append(ins)
-            rbw.sca_values_trace.append(rbw.emu.reg_read(rbw.reg_map[reg]))
+def wb_regs_trace(rbw, address: int, size: int, _data):
+    """Trace written registers value
+
+    For each instruction, output one point per written register value.
+
+    You may filter out uninteresting register accesses using TRACE_DISCARD
+    attribute.
+    """
+    ins = rbw.reg_leak
+    if ins is not None:
+        # Find out which registers are modified
+        _, regs_written = registers_accessed_by_instruction(ins)
 
-    rbw.reg_leak = None
+        for r in regs_written:
+            if r in rbw.TRACE_DISCARD:
+                continue
 
-    ins = rbw.disassemble_single_detailed(address, size)
-    _regs_read, regs_written = registers_accessed_by_instruction(ins)
-    if len(regs_written) > 0:
-        rbw.reg_leak = (ins, regs_written)
+            rbw.sca_address_trace.append(ins)
+            rbw.sca_values_trace.append(rbw.emu.reg_read(r))
+
+    # Information is stored to be used at the next instruction,
+    # once the unicorn engine actually performed the current instruction.
+    rbw.reg_leak = rbw.disassemble_single_detailed(address, size)

From 13219cec3793e4afa82e8320410c13e36ed29f9c Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <alexandre.iooss@ledger.fr>
Date: Tue, 12 Apr 2022 14:53:31 +0200
Subject: [PATCH 8/8] Pass rainbow instance as hook data

---
 rainbow/rainbow.py | 12 +++---------
 rainbow/tracers.py | 15 ++++++++-------
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/rainbow/rainbow.py b/rainbow/rainbow.py
index 886d759..eef5e1d 100644
--- a/rainbow/rainbow.py
+++ b/rainbow/rainbow.py
@@ -271,12 +271,12 @@ def setup(self):
         self.block_hook = self.emu.hook_add(uc.UC_HOOK_BLOCK,
             HookWeakMethod(self.block_handler))
         if self.sca_mode:
-            if (self.sca_HD):
+            if self.sca_HD:
                 self.ct_hook = self.emu.hook_add(uc.UC_HOOK_CODE,
-                    HookWeakMethod(self.sca_code_traceHD))
+                    regs_hd_sum_trace, self)
             else:
                 self.ct_hook = self.emu.hook_add(uc.UC_HOOK_CODE,
-                    HookWeakMethod(self.sca_code_trace))
+                    regs_hw_sum_trace, self)
             self.tm_hook = self.emu.hook_add(
                 uc.UC_HOOK_MEM_READ | uc.UC_HOOK_MEM_WRITE,
                 HookWeakMethod(self.sca_trace_mem))
@@ -363,12 +363,6 @@ def print_asmline(self, adr, ins, op_str):
         )
         print("\n" + color("YELLOW", f"{adr:8X}  ") + line, end=";")
 
-    def sca_code_trace(self, _uci, address, size, data):
-        regs_hw_sum_trace(self, address, size, data)
-
-    def sca_code_traceHD(self, uci, address, size, data):
-        regs_hd_sum_trace(self, address, size, data)
-
     def code_trace(self, uci, address, size, data):
         """ 
         Hook that traces modified register values in side-channel mode. 
diff --git a/rainbow/tracers.py b/rainbow/tracers.py
index 1ad3d26..889d120 100644
--- a/rainbow/tracers.py
+++ b/rainbow/tracers.py
@@ -19,6 +19,7 @@
 import functools
 from typing import List, Tuple
 import capstone as cs
+import unicorn as uc
 
 from .utils import hw
 
@@ -34,7 +35,7 @@ def registers_accessed_by_instruction(insn: cs.CsInsn) -> Tuple[List[int], List[
     return insn.regs_access()
 
 
-def regs_hw_sum_trace(rbw, address: int, size: int, _data):
+def regs_hw_sum_trace(uci: uc.Uc, address: int, size: int, rbw):
     """Trace written registers Hamming weight
 
     For each instruction, this tracer sums the Hamming weight of all written
@@ -47,7 +48,7 @@ def regs_hw_sum_trace(rbw, address: int, size: int, _data):
     if ins is not None:
         # Find out which registers are modified
         _, regs_written = registers_accessed_by_instruction(ins)
-        v = sum(hw(rbw.emu.reg_read(r)) for r in regs_written)
+        v = sum(hw(uci.reg_read(r)) for r in regs_written)
 
         rbw.sca_address_trace.append(f"{ins.address:8X} {ins.mnemonic:<6}  {ins.op_str}")
         rbw.sca_values_trace.append(v)
@@ -57,7 +58,7 @@ def regs_hw_sum_trace(rbw, address: int, size: int, _data):
     rbw.reg_leak = rbw.disassemble_single_detailed(address, size)
 
 
-def regs_hd_sum_trace(rbw, address: int, size: int, _data):
+def regs_hd_sum_trace(uci: uc.Uc, address: int, size: int, rbw):
     """Trace written registers Hamming distance
 
     For each instruction, this tracer sums the Hamming distance of all written
@@ -78,8 +79,8 @@ def regs_hd_sum_trace(rbw, address: int, size: int, _data):
         for r in regs_written:
             if r in rbw.TRACE_DISCARD:
                 continue
-            v += rbw.RegistersBackup[r] ^ rbw.emu.reg_read(r)
-            rbw.RegistersBackup[r] = rbw.emu.reg_read(r)
+            v += hw(rbw.RegistersBackup[r] ^ uci.reg_read(r))
+            rbw.RegistersBackup[r] = uci.reg_read(r)
 
         rbw.sca_address_trace.append(f"{ins.address:8X} {ins.mnemonic:<6}  {ins.op_str}")
         rbw.sca_values_trace.append(v)
@@ -89,7 +90,7 @@ def regs_hd_sum_trace(rbw, address: int, size: int, _data):
     rbw.reg_leak = rbw.disassemble_single_detailed(address, size)
 
 
-def wb_regs_trace(rbw, address: int, size: int, _data):
+def wb_regs_trace(uci: uc.Uc, address: int, size: int, rbw):
     """Trace written registers value
 
     For each instruction, output one point per written register value.
@@ -107,7 +108,7 @@ def wb_regs_trace(rbw, address: int, size: int, _data):
                 continue
 
             rbw.sca_address_trace.append(ins)
-            rbw.sca_values_trace.append(rbw.emu.reg_read(r))
+            rbw.sca_values_trace.append(uci.reg_read(r))
 
     # Information is stored to be used at the next instruction,
     # once the unicorn engine actually performed the current instruction.