Tweaks

mandiant · Sep 26, 2023 · e083376 · e083376
1 parent cfeb127
commit e083376
Showing 1 changed file with 20 additions and 12 deletions.
diff --git a/floss/language/rust/decode_utf8.py b/floss/language/rust/decode_utf8.py
@@ -1,22 +1,28 @@
 # Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
-import pefile
+import sys
 import logging
-import argparse
 import pathlib
-import sys
+import argparse
+
+import pefile
 
 MIN_STR_LEN = 4
 
 logger = logging.getLogger(__name__)
 
+
 def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
     for section in pe.sections:
         if section.Name.startswith(b".rdata\x00"):
             return section
 
     raise ValueError("no .rdata section found")
 
-def extract_utf8_strings(pe, min_length=MIN_STR_LEN):
+
+def extract_utf8_strings(pe: pefile.PE, min_length=MIN_STR_LEN):
+    """
+    Extracts UTF-8 strings from the .rdata section of a PE file.
+    """
     try:
         rdata_section = get_rdata_section(pe)
     except ValueError as e:
@@ -27,7 +33,7 @@ def extract_utf8_strings(pe, min_length=MIN_STR_LEN):
 
     character_and_index = []
 
-    # Reference: https://en.wikipedia.org/wiki/UTF-8 
+    # Reference: https://en.wikipedia.org/wiki/UTF-8
 
     for i in range(0, len(strings)):
         # for 1 byte
@@ -37,34 +43,36 @@ def extract_utf8_strings(pe, min_length=MIN_STR_LEN):
 
         # for 2 bytes
         elif strings[i] & 0xE0 == 0xC0:
-            temp = strings[i] << 8 | strings[i+1]
+            temp = strings[i] << 8 | strings[i + 1]
             character = temp.to_bytes(2, "big").decode("utf-8", "ignore")
             i += 1
             character_and_index.append([character, i, 2])
 
         # for 3 bytes
         elif strings[i] & 0xF0 == 0xE0:
-            temp = strings[i] << 16 | strings[i+1] << 8 | strings[i+2]
+            temp = strings[i] << 16 | strings[i + 1] << 8 | strings[i + 2]
             character = temp.to_bytes(3, "big").decode("utf-8", "ignore")
             i += 2
             character_and_index.append([character, i, 3])
 
         # for 4 bytes
         elif strings[i] & 0xF8 == 0xF0:
-            temp = strings[i] << 24 | strings[i+1] << 16 | strings[i+2] << 8 | strings[i+3]
+            temp = strings[i] << 24 | strings[i + 1] << 16 | strings[i + 2] << 8 | strings[i + 3]
             character = temp.to_bytes(4, "big").decode("utf-8", "ignore")
             i += 3
             character_and_index.append([character, i, 4])
 
-
-    strings = [] # string, start index, end index
+    strings = []  # string, start index, end index
 
     # check for consecutive characters and convert to string
     for i in range(0, len(character_and_index)):
         if i == 0:
             strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]])
         else:
-            if character_and_index[i-1][1] + character_and_index[i-1][2] == character_and_index[i][1] and character_and_index[i][0].isprintable() == True:
+            if (
+                character_and_index[i - 1][1] + character_and_index[i - 1][2] == character_and_index[i][1]
+                and character_and_index[i][0].isprintable() == True
+            ):
                 strings[-1][0] += character_and_index[i][0]
                 strings[-1][2] = character_and_index[i][1]
             else:
@@ -102,4 +110,4 @@ def main(argv=None):
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(main())