-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconfig_model_transformer.py
108 lines (98 loc) · 2.47 KB
/
config_model_transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import copy
import texar as tx
random_seed = 1234
beam_width = 5
hidden_dim = 384
coverity_dim = 128
alpha = 0
def get_embedder_hparams(hidden_dim, name):
return {
'name': name,
'dim': hidden_dim,
'initializer': {
'type': 'random_normal_initializer',
'kwargs': {
'mean': 0.0,
'stddev': hidden_dim ** -0.5,
},
}
}
embedders = {
name: get_embedder_hparams(dim, '{}_embedder'.format(name))
for name, dim in (
('y_aux', hidden_dim),
('x_value', hidden_dim / 2),
('x_type', hidden_dim / 8),
('x_associated', hidden_dim / 8 * 3))}
y_encoder = {
'dim': hidden_dim,
'num_blocks': 3,
'residual_dropout': 0.1,
'multihead_attention': {
'num_heads': 8,
'output_dim': 384,
# 'num_units': 384,
# 'dropout_rate': 0.1
# See documentation for more optional hyperparameters
},
'position_embedder_hparams': {
'dim': hidden_dim
},
'initializer': {
'type': 'variance_scaling_initializer',
'kwargs': {
'scale': 1.0,
'mode': 'fan_avg',
'distribution': 'uniform',
},
},
'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(
output_dim=hidden_dim)
}
x_encoder = {
'dim': hidden_dim,
'num_blocks': 3,
'residual_dropout': 0.1,
'multihead_attention': {
'num_heads': 8,
'output_dim': 384,
# 'num_units': 384,
# 'dropout_rate': 0.1
# See documentation for more optional hyperparameters
},
'position_embedder_hparams': {
'dim': hidden_dim
},
'initializer': {
'type': 'variance_scaling_initializer',
'kwargs': {
'scale': 1.0,
'mode': 'fan_avg',
'distribution': 'uniform',
},
},
'poswise_feedforward': tx.modules.default_transformer_poswise_net_hparams(
output_dim= hidden_dim)
}
decoder = copy.deepcopy(x_encoder)
# rnn_cell = {
# 'type': 'LSTMBlockCell',
# 'kwargs': {
# 'num_units': hidden_dim,
# 'forget_bias': 0.
# },
# 'dropout': {
# 'input_keep_prob': 0.8,
# 'state_keep_prob': 0.5,
# },
# 'num_layers': 1
# }
# attention_decoder = {
# 'name': 'attention_decoder',
# 'attention': {
# 'type': 'LuongAttention',
# 'kwargs': {
# 'num_units': hidden_dim,
# }
# }
# }