From 418a458d4d90cbbe9bf4bf73c7e31a5cb7a1b6f8 Mon Sep 17 00:00:00 2001 From: Ryan Rolnicki Date: Thu, 5 May 2022 23:59:04 +0900 Subject: [PATCH 1/5] Add encoding/decoding from character set --- .gitignore | 2 ++ src/bitarray.ts | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ test/suite.ts | 35 ++++++++++++++++++++++++- 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3659f1a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +node_modules/* +dist/* diff --git a/src/bitarray.ts b/src/bitarray.ts index 4b3155d..933982d 100644 --- a/src/bitarray.ts +++ b/src/bitarray.ts @@ -172,6 +172,74 @@ export default class BitArray extends BitTypedArray { return ret; } + /** + * + * @param charSet a set of n characters to use to encode the BitArray; charSet.length must be a power of 2 (2, 4, 8, etc) + * The more characters in the set, the more compact the resulting output will be + * @returns a string encoded using the provided character set (e.g., base64 encoding can be achieved with this) + */ + encodeWithCharacterSet( charSet: string | string[] ): string { + const charArray = ((typeof charSet === 'string') ? Array.from(charSet) : charSet); + const log2 = Math.log2(charArray.length); + + if (log2 < 1 || log2 % 1 !== 0) { + throw new RangeError('Provided charset\' length must non-0 positive power of 2'); + } + + const ret = []; + + let val = 0; + let valLen = 0; + for (const b of this) { + valLen++; + val <<= 1; + val += b; + + if (valLen === log2) { + ret.push(charArray[val]); + valLen = val = 0; + } + } + + if (valLen !== 0) { + val <<= (log2 - valLen); + ret.push(charArray[val]); + } + + return ret.join(''); + } + + /** + * + * @param charSet a set of n characters to use to encode the BitArray; charSet.length must be a power of 2 (2, 4, 8, etc), + * and should generally match the set used in the original encoding + * @param encodedString an encoded string built with encodeWithCharacterSet + * @returns a BitArray of the encodedString decoded using charSet + */ + static decodeWithCharacterSet( charSet: string | string[], encodedString: string ): BitArray { + const charArray = ((typeof charSet === 'string') ? Array.from(charSet) : charSet); + const log2 = Math.log2(charArray.length); + + if (log2 < 1 || log2 % 1 !== 0) { + throw new RangeError('Provided charset\' length must non-0 positive power of 2'); + } + + const pad = (s: string) => '0'.repeat(log2 - s.length) + s + + const charMap = {} // maps each character to its integral value + charArray.forEach((k, i) => { + charMap[k] = pad(i.toString(2)) + }); + const deserialized = Array.from(encodedString).flatMap(c => { + if (!(c in charMap)) { + throw new RangeError('Invalid character found in encoded string'); + } + return charMap[c]; + }).join('') + const ret = BitArray.from(deserialized); + return ret; + } + } // create aliases diff --git a/test/suite.ts b/test/suite.ts index 1155f1b..137d64a 100644 --- a/test/suite.ts +++ b/test/suite.ts @@ -9,6 +9,18 @@ const arr2 = new Array( len + 10 ).fill(false).map( x => Math.random() > 0.5 ) const sample1 = BitArray.from( arr1 ); const sample2 = BitArray.of( ...arr2 ); +const sample3 = BitArray.from( '0110'); + +// Returns true if the block throws +function expectThrow( fn: () => void) { + try { + fn(); + } catch(e) { + return true; + } + + return false; +} // matches the format of BitArray.toSting() function toString( arr ) { @@ -55,9 +67,30 @@ const binary_operations = (()=>{ })(); +/** suite 4 */ +const character_encoding_from_set = { + ".encodeWithCharacterSet_1bit": sample3.encodeWithCharacterSet('ab') === 'abba', + ".encodeWithCharacterSet_3bit": sample3.encodeWithCharacterSet('abcdefgh') === 'da', + ".encodeWithCharacterSet_": expectThrow(() => sample3.encodeWithCharacterSet('')), + ".encodeWithCharacterSet_a": expectThrow(() => sample3.encodeWithCharacterSet('a')), + ".encodeWithCharacterSet_abc": expectThrow(() => sample3.encodeWithCharacterSet('abc')) +}; + +/** suite 5 */ +const character_encode_decode = { + ".decodeWithCharacterSet_1bit": BitArray.decodeWithCharacterSet('ab', 'abba').toString() === sample3.toString(), + ".decodeWithCharacterSet_3bit": BitArray.decodeWithCharacterSet('abcdefgh', 'da').toString().substring(0, 4) === sample3.toString(), + ".decodeWithCharacterSet_empty": BitArray.decodeWithCharacterSet('ab', '').toString() === '', + ".decodeWithCharacterSet_invalid": expectThrow(() => BitArray.decodeWithCharacterSet('ab', 'abc')), + ".decodeWithCharacterSet_": expectThrow(() => BitArray.decodeWithCharacterSet('', 'abba')), +}; + + export default { instantiating, properties, - binary_operations + binary_operations, + character_encoding_from_set, + character_encode_decode }; From de8f61cc56eda5287291a21fbd08a3972fc7d43e Mon Sep 17 00:00:00 2001 From: Ryan Rolnicki Date: Fri, 6 May 2022 00:19:12 +0900 Subject: [PATCH 2/5] Adding base64 helpers --- src/bitarray.ts | 12 ++++++++++++ test/suite.ts | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/bitarray.ts b/src/bitarray.ts index 933982d..207537c 100644 --- a/src/bitarray.ts +++ b/src/bitarray.ts @@ -240,6 +240,18 @@ export default class BitArray extends BitTypedArray { return ret; } + // Convenience specializations for encoding base64MIME and base64Url + static base64MIMEChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + encodeBase64MIME() { return this.encodeWithCharacterSet(BitArray.base64MIMEChars) } + static decodeBase64MIME(encodedString: string) { + return BitArray.decodeWithCharacterSet(BitArray.base64MIMEChars, encodedString); + } + + static base64UrlChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' + encodeBase64Url() { return this.encodeWithCharacterSet(BitArray.base64UrlChars) } + static decodeBase64Url(encodedString: string) { + return BitArray.decodeWithCharacterSet(BitArray.base64UrlChars, encodedString); + } } // create aliases diff --git a/test/suite.ts b/test/suite.ts index 137d64a..945311d 100644 --- a/test/suite.ts +++ b/test/suite.ts @@ -79,10 +79,12 @@ const character_encoding_from_set = { /** suite 5 */ const character_encode_decode = { ".decodeWithCharacterSet_1bit": BitArray.decodeWithCharacterSet('ab', 'abba').toString() === sample3.toString(), + // Note: the substring is needed because when deserializing, we have some number of padding 0s that we can't know were + // in the original string or not ".decodeWithCharacterSet_3bit": BitArray.decodeWithCharacterSet('abcdefgh', 'da').toString().substring(0, 4) === sample3.toString(), ".decodeWithCharacterSet_empty": BitArray.decodeWithCharacterSet('ab', '').toString() === '', ".decodeWithCharacterSet_invalid": expectThrow(() => BitArray.decodeWithCharacterSet('ab', 'abc')), - ".decodeWithCharacterSet_": expectThrow(() => BitArray.decodeWithCharacterSet('', 'abba')), + ".decodeWithCharacterSet_": expectThrow(() => BitArray.decodeWithCharacterSet('', 'abba')) }; From 6df5ed3c2d2097e468f38f8ed23a98504d7bf2f7 Mon Sep 17 00:00:00 2001 From: Ryan Rolnicki Date: Sun, 8 May 2022 08:07:09 +0900 Subject: [PATCH 3/5] Remove string[] from encode/decode params --- src/bitarray.ts | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/bitarray.ts b/src/bitarray.ts index 207537c..d13761b 100644 --- a/src/bitarray.ts +++ b/src/bitarray.ts @@ -174,12 +174,11 @@ export default class BitArray extends BitTypedArray { /** * - * @param charSet a set of n characters to use to encode the BitArray; charSet.length must be a power of 2 (2, 4, 8, etc) + * @param charArray a set of n characters to use to encode the BitArray; charArray.length must be a power of 2 (2, 4, 8, etc) * The more characters in the set, the more compact the resulting output will be * @returns a string encoded using the provided character set (e.g., base64 encoding can be achieved with this) */ - encodeWithCharacterSet( charSet: string | string[] ): string { - const charArray = ((typeof charSet === 'string') ? Array.from(charSet) : charSet); + encodeWithCharacterSet( charArray: string): string { const log2 = Math.log2(charArray.length); if (log2 < 1 || log2 % 1 !== 0) { @@ -211,13 +210,12 @@ export default class BitArray extends BitTypedArray { /** * - * @param charSet a set of n characters to use to encode the BitArray; charSet.length must be a power of 2 (2, 4, 8, etc), + * @param charArray a set of n characters to use to encode the BitArray; charArray.length must be a power of 2 (2, 4, 8, etc), * and should generally match the set used in the original encoding * @param encodedString an encoded string built with encodeWithCharacterSet - * @returns a BitArray of the encodedString decoded using charSet + * @returns a BitArray of the encodedString decoded using charArray */ - static decodeWithCharacterSet( charSet: string | string[], encodedString: string ): BitArray { - const charArray = ((typeof charSet === 'string') ? Array.from(charSet) : charSet); + static decodeWithCharacterSet( charArray: string, encodedString: string ): BitArray { const log2 = Math.log2(charArray.length); if (log2 < 1 || log2 % 1 !== 0) { @@ -227,9 +225,10 @@ export default class BitArray extends BitTypedArray { const pad = (s: string) => '0'.repeat(log2 - s.length) + s const charMap = {} // maps each character to its integral value - charArray.forEach((k, i) => { - charMap[k] = pad(i.toString(2)) - }); + for (var i = 0; i < charArray.length; i++) { + charMap[charArray[i]] = pad(i.toString(2)) + } + const deserialized = Array.from(encodedString).flatMap(c => { if (!(c in charMap)) { throw new RangeError('Invalid character found in encoded string'); From afbf65538d010f5f6f3397850c31bbb8b0e2186f Mon Sep 17 00:00:00 2001 From: Ryan Rolnicki Date: Sun, 8 May 2022 08:08:53 +0900 Subject: [PATCH 4/5] Use map instead of flatMap --- src/bitarray.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitarray.ts b/src/bitarray.ts index d13761b..8d9130d 100644 --- a/src/bitarray.ts +++ b/src/bitarray.ts @@ -228,8 +228,8 @@ export default class BitArray extends BitTypedArray { for (var i = 0; i < charArray.length; i++) { charMap[charArray[i]] = pad(i.toString(2)) } - - const deserialized = Array.from(encodedString).flatMap(c => { + + const deserialized = Array.from(encodedString).map(c => { if (!(c in charMap)) { throw new RangeError('Invalid character found in encoded string'); } From 2944985d2bf8f24a30909ac5f8aad7574058c666 Mon Sep 17 00:00:00 2001 From: Ryan Rolnicki Date: Sun, 8 May 2022 08:26:54 +0900 Subject: [PATCH 5/5] Error text update --- src/bitarray.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitarray.ts b/src/bitarray.ts index 8d9130d..231d3b1 100644 --- a/src/bitarray.ts +++ b/src/bitarray.ts @@ -182,7 +182,7 @@ export default class BitArray extends BitTypedArray { const log2 = Math.log2(charArray.length); if (log2 < 1 || log2 % 1 !== 0) { - throw new RangeError('Provided charset\' length must non-0 positive power of 2'); + throw new RangeError('Provided charArray\'s length must non-0 positive power of 2'); } const ret = []; @@ -219,7 +219,7 @@ export default class BitArray extends BitTypedArray { const log2 = Math.log2(charArray.length); if (log2 < 1 || log2 % 1 !== 0) { - throw new RangeError('Provided charset\' length must non-0 positive power of 2'); + throw new RangeError('Provided charArray\'s length must non-0 positive power of 2'); } const pad = (s: string) => '0'.repeat(log2 - s.length) + s