Skip to content

Commit

Permalink
better documented 'str/normalize-utf'
Browse files Browse the repository at this point in the history
  • Loading branch information
jlangch committed Mar 26, 2024
1 parent 2cfec8b commit 9b834a9
Showing 1 changed file with 9 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2894,7 +2894,7 @@ public VncVal apply(final VncList args) {
.doc(
"Normalizes an UTF string.\n\n" +
"On MacOS umlauts like ä are just encoded as 'a' plus the combining diaresis\n" +
"character. Therefore an 'ä' in Java and an 'ä' from a MacOS filename are\n" +
"character ̈. Therefore an 'ä' in Java and an 'ä' from a MacOS filename are\n" +
"different!\n\n" +
"This function normalizes UTF strings to simplify processing.\n\n" +
"The _form_ argument is one of:\n"+
Expand All @@ -2909,19 +2909,17 @@ public VncVal apply(final VncList args) {
" ;; 1: \"ü\" prints to \"ü\" \n" +
" ;; 2: \"u\\u0308\" prints to \"ü\" \n" +
" \n" +
"The statement: \n" +
"«If it looks like a duck and quacks like a duck, then it probably is a \n" +
"duck» is WRONG here! \n" +
"«If it looks like a duck and quacks like a duck, then it probably is a duck» is definitely WRONG here! \n" +
" \n" +
";; u with combining diaresis char: \u0308 \n" +
";; u with combining diaresis char: \\u0308 ( ̈) \n" +
"(println \"u\\u0308\") \n" +
";; => ü (actually prints as ü on a terminal) \n" +
" \n" +
";; ü: \\u00FC \n" +
"(println \"\\u00FC\") \n" +
";; => ü \n" +
" \n" +
";; combined u with diaresis character \n" +
";; u with combining diaresis character ̈ \n" +
"(h/dump (bytebuf-from-string \"u\\u0308\")) \n" +
";; 00000000: 75cc 88 u.. \n" +
" \n" +
Expand All @@ -2933,9 +2931,13 @@ public VncVal apply(final VncList args) {
"(h/dump (bytebuf-from-string \"\\u00FC\")) \n" +
";; 00000000: c3bc .. \n" +
" \n" +
";; u with combined diaresis character normalized \n" +
";; u with combined diaresis character normalized to get a standard ü \n" +
"(h/dump (bytebuf-from-string (str/normalize-utf \"u\\u0308\" :NFC))) \n" +
";; 00000000: c3bc .. \n" +
" \n" +
";; the reverse (decomposition) \n" +
"(h/dump (bytebuf-from-string (str/normalize-utf \"\\u00FC\" :NFD))) \n" +
";; 00000000: 75cc 88 u.. \n" +
"``` ")
.build()
) {
Expand Down

0 comments on commit 9b834a9

Please sign in to comment.