From 05063b59acdeadc0bd28ae04bf2c2f95d427c84a Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Tue, 10 Dec 2024 17:25:26 +0900
Subject: [PATCH 01/10] feat: use `TextEncoder` and `TextDecoder` for utf8
 strings

---
 packages/adblocker/src/data-view.ts | 30 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 7d8ab640e2..36eea46899 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -20,6 +20,9 @@ export const EMPTY_UINT32_ARRAY = new Uint32Array(0);
 // Check if current architecture is little endian
 const LITTLE_ENDIAN: boolean = new Int8Array(new Int16Array([1]).buffer)[0] === 1;
 
+// TextEncoder doesn't need to be recreated every time unlike TextDecoder
+const TEXT_ENCODER = new TextEncoder();
+
 // Store compression in a lazy, global singleton
 let getCompressionSingleton: () => Compression = () => {
   const COMPRESSION = new Compression();
@@ -87,8 +90,7 @@ export function sizeOfASCII(str: string): number {
  * Return number of bytes needed to serialize `str` UTF8 string.
  */
 export function sizeOfUTF8(str: string): number {
-  const encodedLength = encode(str).length;
-  return encodedLength + sizeOfLength(encodedLength);
+  return 4 + TEXT_ENCODER.encode(str).length;
 }
 
 /**
@@ -389,23 +391,19 @@ export class StaticDataView {
   }
 
   public pushUTF8(raw: string): void {
-    const str = encode(raw);
-    this.pushLength(str.length);
-
-    for (let i = 0; i < str.length; i += 1) {
-      this.buffer[this.pos++] = str.charCodeAt(i);
-    }
+    const pos = this.getPos();
+    const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(pos + 4));
+    this.setPos(pos);
+    this.pushUint32(written);
+    this.setPos(pos + 4 + written);
   }
 
   public getUTF8(): string {
-    const byteLength = this.getLength();
-    this.pos += byteLength;
-    return decode(
-      String.fromCharCode.apply(
-        null,
-        // @ts-ignore
-        this.buffer.subarray(this.pos - byteLength, this.pos),
-      ),
+    const byteLength = this.getUint32();
+    const pos = this.getPos();
+    this.setPos(pos + byteLength);
+    return new TextDecoder('utf8', { ignoreBOM: true }).decode(
+      this.buffer.subarray(pos, pos + byteLength),
     );
   }
 

From 1de005ea80539d1422725c2988bfd6d49faddd0e Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Tue, 10 Dec 2024 17:32:46 +0900
Subject: [PATCH 02/10] refactor: pos calculation in `pushUTF8`

---
 packages/adblocker/src/data-view.ts | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 36eea46899..7a92fd7230 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -391,11 +391,9 @@ export class StaticDataView {
   }
 
   public pushUTF8(raw: string): void {
-    const pos = this.getPos();
-    const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(pos + 4));
-    this.setPos(pos);
+    const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(this.pos + 4));
     this.pushUint32(written);
-    this.setPos(pos + 4 + written);
+    this.setPos(this.pos + written);
   }
 
   public getUTF8(): string {

From d6032eb11f5d52fa105f9aae2283becf75de994a Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Wed, 11 Dec 2024 22:11:03 +0900
Subject: [PATCH 03/10] chore: save length of string in 16 bits unsigned
 integer

- ~65535 ASCII only characters
---
 packages/adblocker/src/data-view.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 7a92fd7230..aef16b7347 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -392,12 +392,12 @@ export class StaticDataView {
 
   public pushUTF8(raw: string): void {
     const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(this.pos + 4));
-    this.pushUint32(written);
+    this.pushUint16(written);
     this.setPos(this.pos + written);
   }
 
   public getUTF8(): string {
-    const byteLength = this.getUint32();
+    const byteLength = this.getUint16();
     const pos = this.getPos();
     this.setPos(pos + byteLength);
     return new TextDecoder('utf8', { ignoreBOM: true }).decode(

From 65764e9ae39a53d5bc363bde8d718b47c8136535 Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Thu, 12 Dec 2024 14:22:12 +0900
Subject: [PATCH 04/10] fix: use getLength and pushLength for utf8

---
 packages/adblocker/src/data-view.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index aef16b7347..32136f86f3 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -392,12 +392,12 @@ export class StaticDataView {
 
   public pushUTF8(raw: string): void {
     const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(this.pos + 4));
-    this.pushUint16(written);
+    this.pushLength(written);
     this.setPos(this.pos + written);
   }
 
   public getUTF8(): string {
-    const byteLength = this.getUint16();
+    const byteLength = this.getLength();
     const pos = this.getPos();
     this.setPos(pos + byteLength);
     return new TextDecoder('utf8', { ignoreBOM: true }).decode(

From 0339bdca5b8e58405f00c9b051950297e6e2e8a2 Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Thu, 12 Dec 2024 17:53:12 +0900
Subject: [PATCH 05/10] fix: calculate length of utf8 encoded string

---
 packages/adblocker/src/data-view.ts | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 32136f86f3..81be8ae289 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -90,7 +90,12 @@ export function sizeOfASCII(str: string): number {
  * Return number of bytes needed to serialize `str` UTF8 string.
  */
 export function sizeOfUTF8(str: string): number {
-  return 4 + TEXT_ENCODER.encode(str).length;
+  // Fast path for short strs considering the worst case (output ratio of 3)
+  if (str.length < 43 /* Math.ceil(127 / 3) */) {
+    return 1 + TEXT_ENCODER.encode(str).length;
+  }
+  const result = TEXT_ENCODER.encode(str);
+  return sizeOfLength(result.length) + result.length;
 }
 
 /**
@@ -391,8 +396,21 @@ export class StaticDataView {
   }
 
   public pushUTF8(raw: string): void {
-    const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(this.pos + 4));
+    const pos = this.getPos();
+    // Assume the size of output length is 1 (which means output is less than 128)
+    // based on the possible minimal length to avoid memory reallocation.
+    // The minimal length is always 1 byte per character.
+    const start = pos + (raw.length > 127 ? 5 : 1);
+    const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(start));
+    // If we failed to predict, that means the required bytes for length is 5.
+    if (pos + sizeOfLength(written) !== start) {
+      // Push 4 bytes back, `start + 4` or `pos + 5`
+      this.buffer.copyWithin(pos + 5, start, start + written);
+    }
+    // Restore pos to push length
+    this.setPos(pos);
     this.pushLength(written);
+    // Reflect written bytes to pos
     this.setPos(this.pos + written);
   }
 

From 3270e6aad5d58fd984c8ccea1d9665093fd2c902 Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Thu, 12 Dec 2024 18:08:23 +0900
Subject: [PATCH 06/10] chore: drop useless fast exit

---
 packages/adblocker/src/data-view.ts | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 81be8ae289..8dc6b76fef 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -90,12 +90,8 @@ export function sizeOfASCII(str: string): number {
  * Return number of bytes needed to serialize `str` UTF8 string.
  */
 export function sizeOfUTF8(str: string): number {
-  // Fast path for short strs considering the worst case (output ratio of 3)
-  if (str.length < 43 /* Math.ceil(127 / 3) */) {
-    return 1 + TEXT_ENCODER.encode(str).length;
-  }
-  const result = TEXT_ENCODER.encode(str);
-  return sizeOfLength(result.length) + result.length;
+  const encoded = TEXT_ENCODER.encode(str);
+  return sizeOfLength(encoded.length) + encoded.length;
 }
 
 /**

From 47dc63ab36bbca10ed6c609b8e314ca4872c05da Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Thu, 12 Dec 2024 18:16:09 +0900
Subject: [PATCH 07/10] refactor: reuse `sizeOfLength`

---
 packages/adblocker/src/data-view.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 8dc6b76fef..569437fb98 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -394,9 +394,9 @@ export class StaticDataView {
   public pushUTF8(raw: string): void {
     const pos = this.getPos();
     // Assume the size of output length is 1 (which means output is less than 128)
-    // based on the possible minimal length to avoid memory reallocation.
+    // based on the possible minimal length to avoid memory relocation.
     // The minimal length is always 1 byte per character.
-    const start = pos + (raw.length > 127 ? 5 : 1);
+    const start = pos + sizeOfLength(raw.length);
     const { written } = TEXT_ENCODER.encodeInto(raw, this.buffer.subarray(start));
     // If we failed to predict, that means the required bytes for length is 5.
     if (pos + sizeOfLength(written) !== start) {

From 72e5b0d4f189307a96c20138a683a562bafd31e4 Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Fri, 10 Jan 2025 22:59:34 +0900
Subject: [PATCH 08/10] Update packages/adblocker/src/data-view.ts

Co-authored-by: Krzysztof Modras <1228153+chrmod@users.noreply.github.com>
---
 packages/adblocker/src/data-view.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 569437fb98..9ff48f1d2e 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -90,8 +90,8 @@ export function sizeOfASCII(str: string): number {
  * Return number of bytes needed to serialize `str` UTF8 string.
  */
 export function sizeOfUTF8(str: string): number {
-  const encoded = TEXT_ENCODER.encode(str);
-  return sizeOfLength(encoded.length) + encoded.length;
+  const encodedLength = TEXT_ENCODER.encode(str).length;
+  return encodedLength + sizeOfLength(encodedLength);
 }
 
 /**

From dd8332ac667553d6469b33c36b59e4cc6a488f47 Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Fri, 10 Jan 2025 23:04:25 +0900
Subject: [PATCH 09/10] Update packages/adblocker/src/data-view.ts

Co-authored-by: Krzysztof Modras <1228153+chrmod@users.noreply.github.com>
---
 packages/adblocker/src/data-view.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index 9ff48f1d2e..dbce94ff2d 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -412,10 +412,10 @@ export class StaticDataView {
 
   public getUTF8(): string {
     const byteLength = this.getLength();
-    const pos = this.getPos();
-    this.setPos(pos + byteLength);
+    this.pos += byteLength;
     return new TextDecoder('utf8', { ignoreBOM: true }).decode(
-      this.buffer.subarray(pos, pos + byteLength),
+      // @ts-ignore
+      this.buffer.subarray(this.pos - byteLength, this.pos),
     );
   }
 

From c7b3cbc2bb32b83df5e668d4190565ca40f07d34 Mon Sep 17 00:00:00 2001
From: HoJeong Go <seia@outlook.kr>
Date: Fri, 10 Jan 2025 23:07:16 +0900
Subject: [PATCH 10/10] chore: remove unused ts-ignore

---
 packages/adblocker/src/data-view.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/packages/adblocker/src/data-view.ts b/packages/adblocker/src/data-view.ts
index dbce94ff2d..ffc10e1838 100644
--- a/packages/adblocker/src/data-view.ts
+++ b/packages/adblocker/src/data-view.ts
@@ -414,7 +414,6 @@ export class StaticDataView {
     const byteLength = this.getLength();
     this.pos += byteLength;
     return new TextDecoder('utf8', { ignoreBOM: true }).decode(
-      // @ts-ignore
       this.buffer.subarray(this.pos - byteLength, this.pos),
     );
   }