From 8802a113ba16c779c4adaa9dc7e544acb8f4f360 Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Tue, 10 Dec 2024 15:06:35 +0800 Subject: [PATCH] fix: utf16be encoding --- encoding/decoding_test.mbt | 12 ++++++------ encoding/encoding.mbt | 2 +- encoding/encoding_test.mbt | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/encoding/decoding_test.mbt b/encoding/decoding_test.mbt index 00db3fa..1820c79 100644 --- a/encoding/decoding_test.mbt +++ b/encoding/decoding_test.mbt @@ -188,14 +188,14 @@ test "lossy decoding UTF16BE encoded data with UTF8" { inspect!( buf.to_bytes(), content= - #|b"\x00\xd1\x00\x65\xd8\x3c\xdf\xc3\x00\x38\x00\xf3\xd8\x3c\xdf\xca" + #|b"\x8d\xd1\x6b\x65\xd8\x3c\xdf\xc3\x6e\x38\x6c\xf3\xd8\x3c\xdf\xca" , ) let chars = @encoding.decode_lossy(UTF8, buf.to_bytes()) inspect!( chars.iter().collect(), content= - #|['\x00', '�', 'e', '�', '�', '\x00', '8', '\x00', '�', '�'] + #|['�', '�', 'e', '�', '�', 'n', '8', 'l', '�', '�'] , ) } @@ -247,13 +247,13 @@ test "lossy decoding UTF16BE encoded data with UTF16LE" { inspect!( buf.to_bytes(), content= - #|b"\x00\xd1\x00\x65\xd8\x3c\xdf\xc3\x00\x38\x00\xf3\xd8\x3c\xdf\xca" + #|b"\x8d\xd1\x6b\x65\xd8\x3c\xdf\xc3\x6e\x38\x6c\xf3\xd8\x3c\xdf\xca" , ) let chars = @encoding.decode_lossy(UTF16LE, buf.to_bytes()) inspect!( chars.iter().collect(), - content="['턀', '攀', '㳘', '쏟', '㠀', '', '㳘', '쫟']", + content="['톍', '敫', '㳘', '쏟', '㡮', '', '㳘', '쫟']", ) } @@ -331,14 +331,14 @@ test "strictly decoding UTF16BE encoded data with UTF8" { inspect!( buf.to_bytes(), content= - #|b"\x00\xd1\x00\x65\xd8\x3c\xdf\xc3\x00\x38\x00\xf3\xd8\x3c\xdf\xca" + #|b"\x8d\xd1\x6b\x65\xd8\x3c\xdf\xc3\x6e\x38\x6c\xf3\xd8\x3c\xdf\xca" , ) let chars = @encoding.decode_strict(UTF8, buf.to_bytes()) inspect!( chars.iter().collect(), content= - #|[Ok('\x00'), Err(b"\xd1\x00"), Ok('e'), Err(b"\xd8\x3c"), Err(b"\xdf\xc3"), Ok('\x00'), Ok('8'), Ok('\x00'), Err(b"\xf3\xd8\x3c\xdf"), Err(b"\x00")] + #|[Err(b"\x8d"), Err(b"\xd1\x6b"), Ok('e'), Err(b"\xd8\x3c"), Err(b"\xdf\xc3"), Ok('n'), Ok('8'), Ok('l'), Err(b"\xf3\xd8\x3c\xdf"), Err(b"\x00")] , ) } diff --git a/encoding/encoding.mbt b/encoding/encoding.mbt index d6d799d..cb6db92 100644 --- a/encoding/encoding.mbt +++ b/encoding/encoding.mbt @@ -168,7 +168,7 @@ pub fn write_utf16le_char(buf : @buffer.T, value : Char) -> Unit { pub fn write_utf16be_char(buf : @buffer.T, value : Char) -> Unit { let code = value.to_uint() if code < 0x10000 { - let b0 = (code >> 0xFF).to_byte() + let b0 = (code >> 8).to_byte() let b1 = (code & 0xFF).to_byte() buf.write_byte(b0) buf.write_byte(b1) diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt index 90ba1c5..47bdb3a 100644 --- a/encoding/encoding_test.mbt +++ b/encoding/encoding_test.mbt @@ -51,7 +51,7 @@ test "encoding String to UTF16BE" { inspect!( bytes, content= - #|b"\x00\xbb\x00\x66\x00\x2e\x00\x28\x00\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29\x00\x28\x00\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29" + #|b"\x03\xbb\x00\x66\x00\x2e\x00\x28\x03\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29\x00\x28\x03\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29" , ) } @@ -147,13 +147,13 @@ test "to_utf16be_bytes" { inspect!( @encoding.to_utf16be_bytes('α'), content= - #|b"\x00\xb1" + #|b"\x03\xb1" , ) inspect!( @encoding.to_utf16be_bytes('啊'), content= - #|b"\x00\x4a" + #|b"\x55\x4a" , ) inspect!( @@ -215,7 +215,7 @@ test "write_utf16be_char" { inspect!( buf.to_bytes(), content= - #|b"\x00\x41\x00\xb1\x00\x4a\xd8\x3d\xde\x26" + #|b"\x00\x41\x03\xb1\x55\x4a\xd8\x3d\xde\x26" , ) }