From fd86d0b9a2a386340ff4fe0dd048da1bde5378d5 Mon Sep 17 00:00:00 2001 From: BlackoutRoulette Date: Mon, 9 Oct 2023 19:46:15 +0200 Subject: [PATCH] Changed the settings string to base32768 encoding --- GUI/electron/src/modules/generator.js | 12 +-- Settings.py | 53 +++++++----- Unittest.py | 69 +++++++++++++++- pybase32k.py | 115 ++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 28 deletions(-) create mode 100644 pybase32k.py diff --git a/GUI/electron/src/modules/generator.js b/GUI/electron/src/modules/generator.js index da63e408a..66c1c7770 100644 --- a/GUI/electron/src/modules/generator.js +++ b/GUI/electron/src/modules/generator.js @@ -155,7 +155,7 @@ function romBuilding(pythonPath, randoPath, settings) { error = true; errorMsg = data.toString().replace("Please run with -h to see help for further information.", "").replace("Press Enter to exit.", "").trim(); - errorMsgFull = errorMsg; + errorMsgFull = errorMsg; if (romBuildingGenerator) treeKill(romBuildingGenerator.pid); @@ -164,12 +164,12 @@ function romBuilding(pythonPath, randoPath, settings) { romBuildingGenerator.stderr.on('data', data => { //console.log("stderr data", data.toString()); - handleMessage(data); + handleMessage(data); }); romBuildingGenerator.stdout.on('data', data => { //console.log("stdout data", data.toString()); - handleMessage(data); + handleMessage(data); }); module.exports.emit('patchJobProgress', { generationIndex: currentGeneration, progressCurrent: 0, progressTotal: 0, message: "Starting." }); @@ -205,7 +205,7 @@ function romBuilding(pythonPath, randoPath, settings) { } resolve(); - + }).catch((err) => { //Promise RomGeneration console.error('[romBuilding] Rom promise rejected: ' + err); reject({ short: err, long: err }); @@ -337,7 +337,7 @@ function parseSettings(pythonPath, randoPath) { reject(output); } else { - resolve(output.match(/([a-zA-Z0-9])\w+/g)[0]); + resolve(output); } }).catch(err => { @@ -397,4 +397,4 @@ module.exports.parseSettings = parseSettings; module.exports.romBuilding = romBuilding; module.exports.cancelRomBuilding = cancelRomBuilding; module.exports.testPythonPath = testPythonPath; -module.exports.getUpdatedDynamicSetting = getUpdatedDynamicSetting; \ No newline at end of file +module.exports.getUpdatedDynamicSetting = getUpdatedDynamicSetting; diff --git a/Settings.py b/Settings.py index c4cd53b30..cf4878ca0 100644 --- a/Settings.py +++ b/Settings.py @@ -10,6 +10,7 @@ import string import sys import textwrap +import pybase32k from collections.abc import Iterable from typing import Any, Optional @@ -33,33 +34,43 @@ def _get_help_string(self, action) -> Optional[str]: return textwrap.dedent(action.help) -# 32 characters -letters: str = "ABCDEFGHJKLMNPQRSTUVWXYZ23456789" -index_to_letter: dict[int, str] = {i: letters[i] for i in range(32)} -letter_to_index: dict[str, int] = {v: k for k, v in index_to_letter.items()} +def to_bytes(bits: list[int]) -> bytes: + """ + Converts chunks of 8bits into ints + :param bits: a list of bits + :return: an iterator of ints + """ + assert len(bits) % 8 == 0 + s: str = "".join(map(str, bits)) + return bytes(int(s[i:i + 8], 2) for i in range(0, len(s), 8)) + + +def to_bits(bytes_: bytes) -> list[int]: + """ + Converts a bytes object into a list of bits + :param bytes_: a bytes object + :return: an interator of bits + """ + return [(byte >> i) & 1 for byte in bytes_ for i in range(7, -1, -1)] + + +def pad_list(li: list[int], chunk_size: int, padding: int): + mod: int = len(li) % chunk_size + if mod > 0: + return li + [padding] * (chunk_size - mod) + return li def bit_string_to_text(bits: list[int]) -> str: - # pad the bits array to be multiple of 5 - if len(bits) % 5 > 0: - bits += [0] * (5 - len(bits) % 5) - # convert to characters - result = "" - for i in range(0, len(bits), 5): - chunk = bits[i:i + 5] - value = 0 - for b in range(5): - value |= chunk[b] << b - result += index_to_letter[value] - return result + bits = pad_list(bits, 8, 0) + b: bytes = to_bytes(bits) + s: str = pybase32k.encode(b) + return s def text_to_bit_string(text: str) -> list[int]: - bits = [] - for c in text: - index = letter_to_index[c] - for b in range(5): - bits += [ (index >> b) & 1 ] + b: bytes = pybase32k.decode(text) + bits: list[int] = list(to_bits(b)) return bits diff --git a/Unittest.py b/Unittest.py index faec3a667..0f780a2d3 100644 --- a/Unittest.py +++ b/Unittest.py @@ -20,7 +20,7 @@ from LocationList import location_is_viewable from Main import main, resolve_settings, build_world_graphs from Messages import Message, read_messages, shuffle_messages -from Settings import Settings, get_preset_files +from Settings import Settings, get_preset_files, to_bytes, to_bits, pad_list from Spoiler import Spoiler from Rom import Rom @@ -863,3 +863,70 @@ def test_text_shuffle(self): messages = read_messages(rom) shuffle_messages(messages) shuffle_messages(messages, False) + + +class TestSettingsString(unittest.TestCase): + def test_bit_list_to_bytes_0(self): + li = [0] * 8 + result = to_bytes(li) + self.assertEqual(bytes(1), result) + + def test_bit_list_to_bytes_1(self): + li = [1, 0, 1, 0, 0, 0, 0, 1] + result = to_bytes(li) + self.assertEqual(bytes([161]), result) + + def test_bit_list_to_bytes_2(self): + li = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0] + result = to_bytes(li) + self.assertEqual(bytes([68, 48]), result) + + def test_bit_list_to_bytes_3(self): + li = [1] + try: + list(to_bytes(li)) + except: + return + self.assertTrue(False) + + def test_bytes_to_bits_0(self): + b = bytes(1) + result = to_bits(b) + self.assertEqual([0] * 8, result) + + def test_bytes_to_bits_1(self): + b = bytes([161]) + result = to_bits(b) + self.assertEqual([1, 0, 1, 0, 0, 0, 0, 1], result) + + def test_bytes_to_bits_2(self): + b = bytes([68, 48]) + result = to_bits(b) + self.assertEqual([0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0], result) + + def test_bytes_to_bits_3(self): + b = bytes(0) + result = to_bits(b) + self.assertEqual([], result) + + def test_bit_byte_conversion(self): + li = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0] + b = to_bytes(li) + result = to_bits(b) + self.assertEqual(li, result) + + def test_byte_bit_conversion(self): + b = bytes([68, 48]) + li = to_bits(b) + result = to_bytes(li) + self.assertEqual(b, result) + + def test_pad_0(self): + li = [0] + result = pad_list(li, 8, 0) + self.assertEqual(li * 8, result) + + def test_pad_1(self): + li = [0, 1] * 4 + result = pad_list(li, 8, 0) + self.assertEqual(li, result) diff --git a/pybase32k.py b/pybase32k.py new file mode 100644 index 000000000..3d560795e --- /dev/null +++ b/pybase32k.py @@ -0,0 +1,115 @@ +# Author: BlackoutRoulette +# License: MIT +# Source: https://github.com/blackoutroulette/PyBase32k +import re + +BITS_PER_CHAR = 15 # Base32768 is a 15-bit encoding +BITS_PER_BYTE = 8 + +PAIR_STR = [ + 'ҠҿԀԟڀڿݠޟ߀ߟကဟႠႿᄀᅟᆀᆟᇠሿበቿዠዿጠጿᎠᏟᐠᙟᚠᛟកសᠠᡟᣀᣟᦀᦟ᧠᧿ᨠᨿᯀᯟᰀᰟᴀᴟ⇠⇿⋀⋟⍀⏟␀␟─❟➀➿⠀⥿⦠⦿⨠⩟⪀⪿⫠⭟ⰀⰟⲀⳟⴀⴟⵀⵟ⺠⻟㇀㇟㐀䶟䷀龿ꀀꑿ꒠꒿ꔀꗿꙀꙟꚠꛟ꜀ꝟꞀꞟꡀꡟ', + 'ƀƟɀʟ' +] + +_LOOKUP_ENC = {} +_LOOKUP_DEC = {} + + +def _build_lookup(): + for i, s in enumerate(PAIR_STR): + match = list(re.findall("..", s)) + encode_repertoire = [chr(cp) for pair in match for cp in range(ord(pair[0]), ord(pair[1]) + 1)] + + num_z_bits = BITS_PER_CHAR - BITS_PER_BYTE * i # 0 -> 15, 1 -> 7 + _LOOKUP_ENC[num_z_bits] = encode_repertoire + for z, c in enumerate(encode_repertoire): + _LOOKUP_DEC[c] = (num_z_bits, z) + + +_build_lookup() + + +def encode(bytes_: bytes) -> str: + """ + Encodes a bytes object into a Base32768 string. + :param bytes_: a bytes object + :return: the encoded Base32768 string + """ + if type(bytes_) is not bytes: + raise TypeError('Argument must be bytes') + + s: str = '' + z: int = 0 + num_z_bits: int = 0 + + for byte in bytes_: + # Take most significant bit first + for j in range(BITS_PER_BYTE - 1, -1, -1): + bit: int = (byte >> j) & 1 + + z = (z << 1) + bit + num_z_bits += 1 + + if num_z_bits == BITS_PER_CHAR: + s += _LOOKUP_ENC[num_z_bits][z] + z = 0 + num_z_bits = 0 + + if num_z_bits != 0: + while num_z_bits not in _LOOKUP_ENC: + z = (z << 1) + 1 + num_z_bits += 1 + + s += _LOOKUP_ENC[num_z_bits][z] + + return s + + +def decode(s: str) -> bytes: + """ + Decodes a Base32768 string into a bytes object. + :param s: a Base32768 string + :return: the decoded bytes object + """ + if type(s) is not str: + raise TypeError('Argument must be str') + + length: int = len(s) + + # This length is a guess. There's a chance we allocate one more byte here + # than we actually need. But we can count and slice it off later + byte_arr: bytearray = bytearray(length * BITS_PER_CHAR // BITS_PER_BYTE) + num_bytes: int = 0 + byte: int = 0 + num_byte_bits: int = 0 + + for i, c in enumerate(s): + + if c not in _LOOKUP_DEC: + raise ValueError(f'Unrecognised Base32768 character: {c}') + + num_z_bits, z = _LOOKUP_DEC[c] + + if num_z_bits != BITS_PER_CHAR and i != length - 1: + raise ValueError(f'Secondary character found before end of input at position {i}') + + # Take most significant bit first + for j in range(num_z_bits - 1, -1, -1): + bit: int = (z >> j) & 1 + + byte = (byte << 1) + bit + num_byte_bits += 1 + + if num_byte_bits == BITS_PER_BYTE: + byte_arr[num_bytes] = byte + num_bytes += 1 + byte = 0 + num_byte_bits = 0 + + # Final padding bits! Requires special consideration! + # Remember how we always pad with 1s? + # Note: there could be 0 such bits, check still works though + if byte != ((1 << num_byte_bits) - 1): + raise ValueError('Padding mismatch') + + return bytes(byte_arr[:num_bytes])