-
Notifications
You must be signed in to change notification settings - Fork 193
/
demo.py
80 lines (68 loc) · 2.7 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import sys
# Suppress as many warnings as possible
# os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
# from tensorflow.python.util import deprecation
# deprecation._PRINT_DEPRECATION_WARNINGS = False
# import tensorflow as tf
# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from ckiptagger import data_utils, construct_dictionary, WS, POS, NER
def main():
# Download data
data_utils.download_data("./")
# Load model without GPU
ws = WS("./data")
pos = POS("./data")
ner = NER("./data")
# Load model with GPU
# ws = WS("./data", disable_cuda=False)
# pos = POS("./data", disable_cuda=False)
# ner = NER("./data", disable_cuda=False)
# Create custom dictionary
word_to_weight = {
"土地公": 1,
"土地婆": 1,
"公有": 2,
"": 1,
"來亂的": "啦",
"緯來體育台": 1,
}
dictionary = construct_dictionary(word_to_weight)
print(dictionary)
# Run WS-POS-NER pipeline
sentence_list = [
"傅達仁今將執行安樂死,卻突然爆出自己20年前遭緯來體育台封殺,他不懂自己哪裡得罪到電視台。",
"美國參議院針對今天總統布什所提名的勞工部長趙小蘭展開認可聽證會,預料她將會很順利通過參議院支持,成為該國有史以來第一位的華裔女性內閣成員。",
"",
"土地公有政策??還是土地婆有政策。.",
"… 你確定嗎… 不要再騙了……",
"最多容納59,000個人,或5.9萬人,再多就不行了.這是環評的結論.",
"科長說:1,坪數對人數為1:3。2,可以再增加。",
]
word_sentence_list = ws(sentence_list)
# word_sentence_list = ws(sentence_list, sentence_segmentation=True)
# word_sentence_list = ws(sentence_list, recommend_dictionary=dictionary)
# word_sentence_list = ws(sentence_list, coerce_dictionary=dictionary)
pos_sentence_list = pos(word_sentence_list)
entity_sentence_list = ner(word_sentence_list, pos_sentence_list)
# Release model
del ws
del pos
del ner
# Show results
def print_word_pos_sentence(word_sentence, pos_sentence):
assert len(word_sentence) == len(pos_sentence)
for word, pos in zip(word_sentence, pos_sentence):
print(f"{word}({pos})", end="\u3000")
print()
return
for i, sentence in enumerate(sentence_list):
print()
print(f"'{sentence}'")
print_word_pos_sentence(word_sentence_list[i], pos_sentence_list[i])
for entity in sorted(entity_sentence_list[i]):
print(entity)
return
if __name__ == "__main__":
main()
sys.exit()