Skip to content

Commit

Permalink
Tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
Arker123 committed Jun 23, 2024
1 parent 3105843 commit 7481274
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions floss/language/rust/decode_utf8.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,53 +27,52 @@ def extract_utf8_strings_from_buffer(buf, min_length=MIN_STR_LEN) -> List[List[A

# Reference: https://en.wikipedia.org/wiki/UTF-8

character_and_index = []
strings = []

for i in range(0, len(buf)):
# for 1 byte
if buf[i] & 0x80 == 0x00:
character = buf[i].to_bytes(1, "big").decode("utf-8", "ignore")
strings.append([character, i])
character_and_index.append([character, i, 1])

# for 2 bytes
elif buf[i] & 0xE0 == 0xC0:
temp = buf[i] << 8 | buf[i + 1]
character = temp.to_bytes(2, "big").decode("utf-8", "ignore")
i += 1
strings.append([character, i])
character_and_index.append([character, i, 2])

# for 3 bytes
elif buf[i] & 0xF0 == 0xE0:
temp = buf[i] << 16 | buf[i + 1] << 8 | buf[i + 2]
character = temp.to_bytes(3, "big").decode("utf-8", "ignore")
i += 2
strings.append([character, i])
character_and_index.append([character, i, 3])

# for 4 bytes
elif buf[i] & 0xF8 == 0xF0:
temp = buf[i] << 24 | buf[i + 1] << 16 | buf[i + 2] << 8 | buf[i + 3]
character = temp.to_bytes(4, "big").decode("utf-8", "ignore")
i += 3
strings.append([character, i])
character_and_index.append([character, i, 4])

prev = False

for i in range(0, len(strings)):
if strings[i][0].isprintable() == True:
for i in range(0, len(character_and_index)):
if character_and_index[i][0].isprintable() == True:
if prev == False:
strings.append([strings[i][0], strings[i][1]])
strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]])
prev = True
else:
strings[-1][0] += strings[i][0]
strings[-1][1] = strings[i][1]
strings[-1][0] += character_and_index[i][0]
strings[-1][2] = character_and_index[i][1]
else:
prev = False

# filter strings less than min length
strings = [string for string in strings if len(string[0]) >= min_length]

print(strings)

return strings


Expand Down

0 comments on commit 7481274

Please sign in to comment.