-
Notifications
You must be signed in to change notification settings - Fork 1
/
utf16.c
104 lines (96 loc) · 1.5 KB
/
utf16.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/* /lib/rfc/rfc2781, also translates \n to \r\n */
#include <u.h>
#include <libc.h>
#include "dat.h"
#include "fns.h"
enum
{
Bits10 = 0x03FF,
Bits16 = 0xFFFF,
Bits20 = 0x0FFFFF,
HHalfZoneS = 0xD800, HHalfZoneE = 0xDBFF,
LHalfZoneS = 0xDC00, LHalfZoneE = 0xDFFF,
};
int
toutf16(uchar* buf, int nb, char* s, int ns)
{
uchar *b, *eb;
char *es;
Rune r;
b = buf;
eb = b+nb;
es = s+ns;
while(s < es){
if(*s == '\n'){
if(b+2 > eb)
break;
iputs(b, '\r');
b+=2;
}
s += chartorune(&r, s);
if(b+2 > eb)
break;
if(r <= Bits16){
iputs(b, r);
b+=2;
continue;
}
r -= Bits16+1;
if(r > Bits20){
iputs(b, Runeerror);
b+=2;
continue;
}
if(b+4 > eb)
break;
iputs(b+0, HHalfZoneS | (r >> 10));
iputs(b+2, LHalfZoneS | (r & Bits10));
b+=4;
}
return b-buf;
}
int
fromutf16(char* str, int ns, uchar* ws, int nw)
{
char *s, *es, buf[UTFmax];
uchar *q, *eq;
ushort w1, w2;
Rune r;
int n;
s = str;
es = str + ns;
q = ws;
eq = ws + nw;
while(q+2 <= eq){
w1 = igets(q);
q += 2;
if(w1<HHalfZoneS || w1>LHalfZoneE){
r = w1;
goto Convert;
}
if(w1>HHalfZoneE){
r = Runeerror;
goto Convert;
}
if(q+2 > eq){
r = Runeerror;
goto Convert;
}
w2 = igets(q);
q += 2;
if(w2<LHalfZoneS || w2>LHalfZoneE){
r = Runeerror;
goto Convert;
}
r = (w1 & Bits10)<<10 | (w2 & Bits10) + Bits16 + 1;
Convert:
n = runetochar(buf, &r);
if(buf[0] == '\r')
continue;
if(s+n > es)
break;
memmove(s, buf, n);
s += n;
}
return s-str;
}