Minor changes

mandiant · Oct 2, 2023 · 4a54532 · 4a54532
1 parent e083376
commit 4a54532
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 20 deletions.
diff --git a/floss/language/rust/decode_utf8.py b/floss/language/rust/decode_utf8.py
@@ -3,6 +3,7 @@
 import logging
 import pathlib
 import argparse
+from typing import List, Tuple, Iterable, Optional
 
 import pefile
 
@@ -19,7 +20,7 @@ def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
     raise ValueError("no .rdata section found")
 
 
-def extract_utf8_strings(pe: pefile.PE, min_length=MIN_STR_LEN):
+def extract_utf8_strings(pe: pefile.PE, min_length=MIN_STR_LEN) -> List[Tuple[str, int, int]]:
     """
     Extracts UTF-8 strings from the .rdata section of a PE file.
     """
@@ -64,20 +65,18 @@ def extract_utf8_strings(pe: pefile.PE, min_length=MIN_STR_LEN):
 
     strings = []  # string, start index, end index
 
-    # check for consecutive characters and convert to string
+    prev = False
+
     for i in range(0, len(character_and_index)):
-        if i == 0:
-            strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]])
-        else:
-            if (
-                character_and_index[i - 1][1] + character_and_index[i - 1][2] == character_and_index[i][1]
-                and character_and_index[i][0].isprintable() == True
-            ):
+        if character_and_index[i][0].isprintable() == True:
+            if prev == False:
+                strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]])
+                prev = True
+            else:
                 strings[-1][0] += character_and_index[i][0]
                 strings[-1][2] = character_and_index[i][1]
-            else:
-                if character_and_index[i][0].isprintable() == True:
-                    strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]])
+        else:
+            prev = False
 
     # filter strings less than min length
     strings = [string for string in strings if len(string[0]) >= min_length]

diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py
@@ -11,6 +11,7 @@
 
 from floss.results import StaticString, StringEncoding
 from floss.language.utils import find_lea_xrefs, find_mov_xrefs, find_push_xrefs, get_struct_string_candidates
+from floss.language.rust.decode_utf8 import extract_utf8_strings
 
 logger = logging.getLogger(__name__)
 
@@ -26,18 +27,14 @@ def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
 
 
 def filter_and_transform_utf8_strings(
-    strings: List[Tuple[str, str, Tuple[int, int], bool]],
+    strings: List[Tuple[str, int, int]],
     start_rdata: int,
 ) -> List[StaticString]:
     transformed_strings = []
 
     for string in strings:
         s = string[0]
-        string_type = string[1]
-        start = string[2][0] + start_rdata
-
-        if string_type != "UTF8":
-            continue
+        start = string[1] + start_rdata
 
         # our static algorithm does not extract new lines either
         s = s.replace("\n", "")
@@ -98,8 +95,8 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
     virtual_address = rdata_section.VirtualAddress
     pointer_to_raw_data = rdata_section.PointerToRawData
 
-    # extract utf-8 and wide strings, latter not needed here
-    strings = b2s.extract_all_strings(rdata_section.get_data(), min_length)
+    # extract utf-8 strings
+    strings = extract_utf8_strings(pe, min_length)
 
     # select only UTF-8 strings and adjust offset
     static_strings = filter_and_transform_utf8_strings(strings, start_rdata)