Skip to content

Commit

Permalink
Runtime: rely on TextEncoder and TextDecoder
Browse files Browse the repository at this point in the history
  • Loading branch information
hhugo committed Oct 16, 2024
1 parent 1e54c94 commit 0e9dcba
Showing 1 changed file with 33 additions and 124 deletions.
157 changes: 33 additions & 124 deletions runtime/mlBytes.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,115 +82,6 @@ function caml_subarray_to_jsbytes(a, i, len) {
return s;
}

//Provides: caml_utf8_of_utf16
function caml_utf8_of_utf16(s) {
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
c = s.charCodeAt(i);
if (c < 0x80) {
for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++);
if (j - i > 512) {
t.slice(0, 1);
b += t;
t = "";
b += s.slice(i, j);
} else t += s.slice(i, j);
if (j === l) break;
i = j;
}
if (c < 0x800) {
t += String.fromCharCode(0xc0 | (c >> 6));
t += String.fromCharCode(0x80 | (c & 0x3f));
} else if (c < 0xd800 || c >= 0xdfff) {
t += String.fromCharCode(
0xe0 | (c >> 12),
0x80 | ((c >> 6) & 0x3f),
0x80 | (c & 0x3f),
);
} else if (
c >= 0xdbff ||
i + 1 === l ||
(d = s.charCodeAt(i + 1)) < 0xdc00 ||
d > 0xdfff
) {
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
t += "\xef\xbf\xbd";
} else {
i++;
c = (c << 10) + d - 0x35fdc00;
t += String.fromCharCode(
0xf0 | (c >> 18),
0x80 | ((c >> 12) & 0x3f),
0x80 | ((c >> 6) & 0x3f),
0x80 | (c & 0x3f),
);
}
if (t.length > 1024) {
t.slice(0, 1);
b += t;
t = "";
}
}
return b + t;
}

//Provides: caml_utf16_of_utf8
function caml_utf16_of_utf8(s) {
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
c1 = s.charCodeAt(i);
if (c1 < 0x80) {
for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++);
if (j - i > 512) {
t.slice(0, 1);
b += t;
t = "";
b += s.slice(i, j);
} else t += s.slice(i, j);
if (j === l) break;
i = j;
}
v = 1;
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
c = c2 + (c1 << 6);
if (c1 < 0xe0) {
v = c - 0x3080;
if (v < 0x80) v = 1;
} else {
v = 2;
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
c = c2 + (c << 6);
if (c1 < 0xf0) {
v = c - 0xe2080;
if (v < 0x800 || (v >= 0xd7ff && v < 0xe000)) v = 2;
} else {
v = 3;
if (
++i < l &&
((c2 = s.charCodeAt(i)) & -64) === 128 &&
c1 < 0xf5
) {
v = c2 - 0x3c82080 + (c << 6);
if (v < 0x10000 || v > 0x10ffff) v = 3;
}
}
}
}
}
if (v < 4) {
// Invalid sequence
i -= v;
t += "\ufffd";
} else if (v > 0xffff)
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff));
else t += String.fromCharCode(v);
if (t.length > 1024) {
t.slice(0, 1);
b += t;
t = "";
}
}
return b + t;
}

//Provides: jsoo_is_ascii
function jsoo_is_ascii(s) {
// The regular expression gets better at around this point for all browsers
Expand Down Expand Up @@ -429,17 +320,23 @@ function caml_bytes_set(s, i, c) {
return caml_bytes_unsafe_set(s, i, c);
}

//Provides: jsoo_text_encoder
var jsoo_text_encoder = new TextEncoder();

//Provides: jsoo_text_decoder
var jsoo_text_decoder = new TextDecoder();

//Provides: caml_bytes_of_utf16_jsstring
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
//Requires: MlBytes, jsoo_text_encoder
function caml_bytes_of_utf16_jsstring(s) {
var tag = 9 /* BYTES | ASCII */;
if (!jsoo_is_ascii(s))
(tag = 8) /* BYTES | NOT_ASCII */, (s = caml_utf8_of_utf16(s));
return new MlBytes(tag, s, s.length);
var a = jsoo_text_encoder.encode(s);
return new MlBytes(4, a, a.length);
}

//Provides: MlBytes
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii
//Requires: caml_uint8_array_of_bytes
//Requires: jsoo_text_decoder
function MlBytes(tag, contents, length) {
this.t = tag;
this.c = contents;
Expand All @@ -462,9 +359,9 @@ MlBytes.prototype.toString = function () {
}
};
MlBytes.prototype.toUtf16 = function () {
var r = this.toString();
if (this.t === 9) return r;
return caml_utf16_of_utf8(r);
if (this.t === 9) return this.c;
var a = caml_uint8_array_of_bytes(this);
return jsoo_text_decoder.decode(a);
};
MlBytes.prototype.slice = function () {
var content = this.t === 4 ? this.c.slice() : this.c;
Expand Down Expand Up @@ -777,20 +674,33 @@ function caml_jsbytes_of_string(x) {
return x;
}

//Provides: jsoo_text_decoder_buff
var jsoo_text_decoder_buff = new ArrayBuffer(1024);

//Provides: caml_jsstring_of_string const
//Requires: jsoo_is_ascii, caml_utf16_of_utf8
//Requires: jsoo_is_ascii
//Requires: jsoo_text_decoder
//Requires: jsoo_text_decoder_buff
//If: js-string
function caml_jsstring_of_string(s) {
if (jsoo_is_ascii(s)) return s;
return caml_utf16_of_utf8(s);
var a =
s.length <= jsoo_text_decoder_buff.length
? new Uint8Array(jsoo_text_decoder_buff, 0, s.length)
: new Uint8Array(s.length);
for (var i = 0; i < s.length; i++) {
a[i] = s.charCodeAt(i);
}
return jsoo_text_decoder.decode(a);
}

//Provides: caml_string_of_jsstring const
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
//Requires: caml_string_of_array
//Requires: jsoo_text_encoder
//If: js-string
function caml_string_of_jsstring(s) {
if (jsoo_is_ascii(s)) return caml_string_of_jsbytes(s);
else return caml_string_of_jsbytes(caml_utf8_of_utf16(s));
var a = jsoo_text_encoder.encode(s);
return caml_string_of_array(a);
}

//Provides: caml_bytes_of_jsbytes const
Expand Down Expand Up @@ -910,7 +820,6 @@ function caml_ml_bytes_content(s) {
}

//Provides: caml_is_ml_string
//Requires: jsoo_is_ascii
//If: js-string
function caml_is_ml_string(s) {
// biome-ignore lint/suspicious/noControlCharactersInRegex: expected
Expand Down

0 comments on commit 0e9dcba

Please sign in to comment.