Skip to content

Commit

Permalink
Tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
Arker123 committed Sep 26, 2023
1 parent cfeb127 commit e083376
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions floss/language/rust/decode_utf8.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
import pefile
import sys
import logging
import argparse
import pathlib
import sys
import argparse

import pefile

MIN_STR_LEN = 4

logger = logging.getLogger(__name__)


def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
for section in pe.sections:
if section.Name.startswith(b".rdata\x00"):
return section

raise ValueError("no .rdata section found")

def extract_utf8_strings(pe, min_length=MIN_STR_LEN):

def extract_utf8_strings(pe: pefile.PE, min_length=MIN_STR_LEN):
"""
Extracts UTF-8 strings from the .rdata section of a PE file.
"""
try:
rdata_section = get_rdata_section(pe)
except ValueError as e:
Expand All @@ -27,7 +33,7 @@ def extract_utf8_strings(pe, min_length=MIN_STR_LEN):

character_and_index = []

# Reference: https://en.wikipedia.org/wiki/UTF-8
# Reference: https://en.wikipedia.org/wiki/UTF-8

for i in range(0, len(strings)):
# for 1 byte
Expand All @@ -37,34 +43,36 @@ def extract_utf8_strings(pe, min_length=MIN_STR_LEN):

# for 2 bytes
elif strings[i] & 0xE0 == 0xC0:
temp = strings[i] << 8 | strings[i+1]
temp = strings[i] << 8 | strings[i + 1]
character = temp.to_bytes(2, "big").decode("utf-8", "ignore")
i += 1
character_and_index.append([character, i, 2])

# for 3 bytes
elif strings[i] & 0xF0 == 0xE0:
temp = strings[i] << 16 | strings[i+1] << 8 | strings[i+2]
temp = strings[i] << 16 | strings[i + 1] << 8 | strings[i + 2]
character = temp.to_bytes(3, "big").decode("utf-8", "ignore")
i += 2
character_and_index.append([character, i, 3])

# for 4 bytes
elif strings[i] & 0xF8 == 0xF0:
temp = strings[i] << 24 | strings[i+1] << 16 | strings[i+2] << 8 | strings[i+3]
temp = strings[i] << 24 | strings[i + 1] << 16 | strings[i + 2] << 8 | strings[i + 3]
character = temp.to_bytes(4, "big").decode("utf-8", "ignore")
i += 3
character_and_index.append([character, i, 4])


strings = [] # string, start index, end index
strings = [] # string, start index, end index

# check for consecutive characters and convert to string
for i in range(0, len(character_and_index)):
if i == 0:
strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]])
else:
if character_and_index[i-1][1] + character_and_index[i-1][2] == character_and_index[i][1] and character_and_index[i][0].isprintable() == True:
if (
character_and_index[i - 1][1] + character_and_index[i - 1][2] == character_and_index[i][1]
and character_and_index[i][0].isprintable() == True
):
strings[-1][0] += character_and_index[i][0]
strings[-1][2] = character_and_index[i][1]
else:
Expand Down Expand Up @@ -102,4 +110,4 @@ def main(argv=None):


if __name__ == "__main__":
sys.exit(main())
sys.exit(main())

0 comments on commit e083376

Please sign in to comment.