-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfigurationfile.cof
29 lines (27 loc) · 994 Bytes
/
configurationfile.cof
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# MAIN function
[CLUSTER]
filename = "train_set.txt"
clusters = 14
max = 0.9
min = 3
chars = ["-","\.","/","_","%%"]
# filename: string
# the name of log file you want to deal with
#
# clusters: int
# the number of cluster you want to get
#
# max: float in range [0.0, 1.0] or int, default=1.0,
# When building the vocabulary ignore terms that have a
# document frequency strictly higher than the given threshold
# (corpus-specific stop words). If float, the parameter
# represents a proportion of documents, integer absolute counts.
#
# min: float in range [0.0, 1.0] or int, default=1
# When building the vocabulary ignore terms that have a document
# frequency strictly lower than the given threshold. This value
# is also called cut-off in the literature. If float, the parameter
# represents a proportion of documents, integer absolute counts.
#
# chars: list of string, the chars not used for split log
#