-
Notifications
You must be signed in to change notification settings - Fork 0
/
go-tag-predict.toml
94 lines (88 loc) · 2.2 KB
/
go-tag-predict.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#############################
# ワーキング領域
#############################
cache_dir = "/tmp/go-tag-predict_dev/cache"
tmp_dir = "/tmp/go-tag-predict_dev/tmp"
#############################
# 形態素解析エンジンの選択
#############################
# mecab or jumanpp
tokenizer = "mecab"
#############################
# 学習処理のパラメータ
#############################
[supervised]
# 学習ソースに使うブックマークデータのパス
# del.icio.us / pinboard.inのExport形式のXMLファイル
# 例)
# <posts>
# <post href="https://..." description="title" tag="tag1 tag2 ..." />
# <post ... />
# </posts>
learning_source_file = "data/bookmarks-demo.xml"
# 同時に処理する数
parallels_count = 30
# 出力バッファサイズ
writer_buffer_size = 524288
# 出力待ちQueueサイズ
writer_queue_count = 64
#############################
# 分類処理のパラメータ
#############################
[predict]
# カテゴライズ対象のRSSフィード
feed_urls = [
"https://feeds.pinboard.in/rss/popular/",
"https://feeds.pinboard.in/rss/popular/japanese",
"https://feeds.pinboard.in/rss/recent"
]
# 同時に処理する数
parallels_count = 5
# fasttext predictの結果をフィルタリングする
min_probability = 0.001
#############################
# fastText
#############################
[fasttext]
command = "/usr/local/bin/fasttext"
supervised_args = [
"supervised",
"-input",
"{DATA_PATH}",
"-output",
"{MODEL_PATH}",
"-thread",
"4",
"-dim",
"200",
"-neg",
"25",
"-ws",
"8",
"-epoch",
"100"
]
predict_args = [
"predict-prob",
"{MODEL_PATH}",
"-",
"1"
]
#############################
# Mecab
#############################
[mecab]
# 辞書のパス
# Recommend: mecab-ipadic-neologd (https://github.com/neologd/mecab-ipadic-neologd)
dict_dir = "/usr/local/lib/mecab/dic/mecab-ipadic-neologd/build/mecab-ipadic-2.7.0-20070801-neologd-20161027/"
#############################
# juman++ (very slow)
#############################
[jumanpp]
command = "/usr/local/bin/jumanpp"
args = [
"--force-single-path",
"--dict",
"/usr/local/share/jumanpp-resource/dic"
]
token_separator = " "