Skip to content

Commit

Permalink
Fix UTF-8 decoding on 4-byte characters (#712)
Browse files Browse the repository at this point in the history
  • Loading branch information
edusporto authored Sep 10, 2024
1 parent a5054e4 commit d91742e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/fun/builtins.bend
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ Utf8/decode_character (List/Cons a (List/Cons b (List/Cons c (List/Cons d rest))
} else {
if (== (& a 0xF8) 0xF0) {
let r = (| (<< (& a Utf8/mask4) 18) (| (<< (& b Utf8/maskx) 12) (| (<< (& c Utf8/maskx) 6) (& d Utf8/maskx))))
(r, [])
(r, rest)
} else {
(Utf8/REPLACEMENT_CHARACTER, rest)
}
Expand Down
9 changes: 9 additions & 0 deletions tests/golden_tests/io/utf8.bend
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
to-and-back s = (String/decode_utf8 (String/encode_utf8 s))

v1 = (to-and-back "hi")
v2 = (to-and-back "(λf ((λx (f (x x))) (λx (f (x x)))))")
v3 = (to-and-back "🌟")
v4 = (to-and-back "Hello 🌎!")
v5 = (to-and-back "𓆈 𓆉 𓆊 𓆋 𓅯")

main = [v1, v2, v3, v4, v5]
6 changes: 6 additions & 0 deletions tests/snapshots/io__utf8.bend.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
source: tests/golden_tests.rs
input_file: tests/golden_tests/io/utf8.bend
---
Strict mode:
["hi", "(λf ((λx (f (x x))) (λx (f (x x)))))", "🌟", "Hello 🌎!", "𓆈 𓆉 𓆊 𓆋 𓅯"]

0 comments on commit d91742e

Please sign in to comment.