-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperiments.conf
90 lines (73 loc) · 1.84 KB
/
experiments.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Word embeddings.
glove_300d {
path = glove.840B.300d.txt
size = 300
}
glove_300d_2w {
path = glove_50_300_2.txt
size = 300
}
base {
max_training_sentences = 50
ffnn_size = 150
ffnn_depth = 2
# Learning hyperparameters.
max_gradient_norm = 5.0
lstm_dropout_rate = 0.4
lexical_dropout_rate = 0.5
dropout_rate = 0.2
optimizer = adam
learning_rate = 0.001
decay_rate = 0.999
decay_frequency = 100
# Other.
train_path = train.english.jsonlines
eval_path = dev.english.jsonlines
lm_path = elmo_cache.hdf5
test_path = test.english.jsonlines
test_lm_path = elmo_cache.hdf5
eval_frequency = 500
report_frequency = 100
log_root = logs
max_step = 40000
}
# Main configuration.
biaffinemd = ${base}{
model_type = biaffine #lee, biaffine or bert
top_span_ratio = 0.4
mention_selection_method= high_recall #or high_f1
mention_selection_threshold = 0.5 # for high_f1
contextualization_size = 200
contextualization_layers = 3
lm_layers = 3
lm_size = 1024
}
leemd = ${base}{
model_type = lee #lee, biaffine or bert
top_span_ratio = 0.4
mention_selection_method= high_recall #or high_f1
mention_selection_threshold = 0.5 # for high_f1
filter_widths = [3, 4, 5]
filter_size = 50
char_embedding_size = 8
char_vocab_path = "char_vocab.english.txt"
context_embeddings = ${glove_300d}
head_embeddings = ${glove_300d_2w}
contextualization_size = 200
contextualization_layers = 3
lm_layers = 3
lm_size = 1024
feature_size = 20
max_span_width = 30
}
bertmd = ${base}{
model_type = bert #lee, biaffine or bert
top_span_ratio = 0.4
mention_selection_method= high_recall #or high_f1
mention_selection_threshold = 0.5 # for high_f1
bert_size = 768
max_bert_sent_len=256
bert_url="https://tfhub.dev/google/bert_cased_L-12_H-768_A-12/1"
max_span_width = 30
learning_rate = 0.00002
}