forked from jaiminpan/pg_jieba
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjieba_token.h
88 lines (82 loc) · 2.79 KB
/
jieba_token.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/*-------------------------------------------------------------------------
*
* jieba_token.c
* token description
*
* Author: Jaimin Pan <[email protected]>
*
* IDENTIFICATION
* jieba_token.h
*
*-------------------------------------------------------------------------
*/
#ifndef JIEBA_TOKEN_H_
#define JIEBA_TOKEN_H_
typedef struct TokenDescData {
const char* token;
const char* descr;
} TokenDescData;
typedef struct TokenDescData *TokenDesc;
static const TokenDescData lex_descr[] =
{
{ .token = "", .descr = ""},
{ .token = "eng", .descr = "letter"},
{ .token = "nz", .descr = "other proper noun"},
{ .token = "n", .descr = "noun"},
{ .token = "m", .descr = "numeral"},
{ .token = "i", .descr = "idiom"},
{ .token = "l", .descr = "temporary idiom"},
{ .token = "d", .descr = "adverb"},
{ .token = "s", .descr = "space"},
{ .token = "t", .descr = "time"},
{ .token = "mq", .descr = "numeral-classifier compound"},
{ .token = "nr", .descr = "person's name"},
{ .token = "j", .descr = "abbreviate"},
{ .token = "a", .descr = "adjective"},
{ .token = "r", .descr = "pronoun"},
{ .token = "b", .descr = "difference"},
{ .token = "f", .descr = "direction noun"},
{ .token = "nrt", .descr = "nrt"},
{ .token = "v", .descr = "verb"},
{ .token = "z", .descr = "z"},
{ .token = "ns", .descr = "location"},
{ .token = "q", .descr = "quantity"},
{ .token = "vn", .descr = "vn"},
{ .token = "c", .descr = "conjunction"},
{ .token = "nt", .descr = "organization"},
{ .token = "u", .descr = "auxiliary"},
{ .token = "o", .descr = "onomatopoeia"},
{ .token = "zg", .descr = "zg"},
{ .token = "nrfg", .descr = "nrfg"},
{ .token = "df", .descr = "df"},
{ .token = "p", .descr = "prepositional"},
{ .token = "g", .descr = "morpheme"},
{ .token = "y", .descr = "modal verbs"},
{ .token = "ad", .descr = "ad"},
{ .token = "vg", .descr = "vg"},
{ .token = "ng", .descr = "ng"},
{ .token = "x", .descr = "unknown"},
{ .token = "ul", .descr = "ul"},
{ .token = "k", .descr = "k"},
{ .token = "ag", .descr = "ag"},
{ .token = "dg", .descr = "dg"},
{ .token = "rr", .descr = "rr"},
{ .token = "rg", .descr = "rg"},
{ .token = "an", .descr = "an"},
{ .token = "vq", .descr = "vq"},
{ .token = "e", .descr = "exclamation"},
{ .token = "uv", .descr = "uv"},
{ .token = "tg", .descr = "tg"},
{ .token = "mg", .descr = "mg"},
{ .token = "ud", .descr = "ud"},
{ .token = "vi", .descr = "vi"},
{ .token = "vd", .descr = "vd"},
{ .token = "uj", .descr = "uj"},
{ .token = "uz", .descr = "uz"},
{ .token = "h", .descr = "h"},
{ .token = "ug", .descr = "ug"},
{ .token = "rz", .descr = "rz"}
};
/* Start From 1 and LASTNUM is the last number */
extern int LASTNUM;
#endif /* JIEBA_TOKEN_H_ */