diff --git a/floss/language/rust/decode_utf8.py b/floss/language/rust/decode_utf8.py index d2992524f..11983fe62 100644 --- a/floss/language/rust/decode_utf8.py +++ b/floss/language/rust/decode_utf8.py @@ -27,53 +27,52 @@ def extract_utf8_strings_from_buffer(buf, min_length=MIN_STR_LEN) -> List[List[A # Reference: https://en.wikipedia.org/wiki/UTF-8 + character_and_index = [] strings = [] for i in range(0, len(buf)): # for 1 byte if buf[i] & 0x80 == 0x00: character = buf[i].to_bytes(1, "big").decode("utf-8", "ignore") - strings.append([character, i]) + character_and_index.append([character, i, 1]) # for 2 bytes elif buf[i] & 0xE0 == 0xC0: temp = buf[i] << 8 | buf[i + 1] character = temp.to_bytes(2, "big").decode("utf-8", "ignore") i += 1 - strings.append([character, i]) + character_and_index.append([character, i, 2]) # for 3 bytes elif buf[i] & 0xF0 == 0xE0: temp = buf[i] << 16 | buf[i + 1] << 8 | buf[i + 2] character = temp.to_bytes(3, "big").decode("utf-8", "ignore") i += 2 - strings.append([character, i]) + character_and_index.append([character, i, 3]) # for 4 bytes elif buf[i] & 0xF8 == 0xF0: temp = buf[i] << 24 | buf[i + 1] << 16 | buf[i + 2] << 8 | buf[i + 3] character = temp.to_bytes(4, "big").decode("utf-8", "ignore") i += 3 - strings.append([character, i]) + character_and_index.append([character, i, 4]) prev = False - for i in range(0, len(strings)): - if strings[i][0].isprintable() == True: + for i in range(0, len(character_and_index)): + if character_and_index[i][0].isprintable() == True: if prev == False: - strings.append([strings[i][0], strings[i][1]]) + strings.append([character_and_index[i][0], character_and_index[i][1], character_and_index[i][1]]) prev = True else: - strings[-1][0] += strings[i][0] - strings[-1][1] = strings[i][1] + strings[-1][0] += character_and_index[i][0] + strings[-1][2] = character_and_index[i][1] else: prev = False # filter strings less than min length strings = [string for string in strings if len(string[0]) >= min_length] - print(strings) - return strings