-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_architecture.txt
63 lines (63 loc) · 2.68 KB
/
model_architecture.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
Wav2Vec2ForCTC(
(wav2vec2): Wav2Vec2Model(
(feature_extractor): Wav2Vec2FeatureEncoder(
(conv_layers): ModuleList(
(0): Wav2Vec2GroupNormConvLayer(
(conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False)
(activation): GELUActivation()
(layer_norm): GroupNorm(512, 512, eps=1e-05, affine=True)
)
(1-4): 4 x Wav2Vec2NoLayerNormConvLayer(
(conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False)
(activation): GELUActivation()
)
(5-6): 2 x Wav2Vec2NoLayerNormConvLayer(
(conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,), bias=False)
(activation): GELUActivation()
)
)
)
(feature_projection): Wav2Vec2FeatureProjection(
(layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(projection): Linear(in_features=512, out_features=768, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(encoder): Wav2Vec2Encoder(
(pos_conv_embed): Wav2Vec2PositionalConvEmbedding(
(conv): ParametrizedConv1d(
768, 768, kernel_size=(128,), stride=(1,), padding=(64,), groups=16
(parametrizations): ModuleDict(
(weight): ParametrizationList(
(0): _WeightNorm()
)
)
)
(padding): Wav2Vec2SamePadLayer()
(activation): GELUActivation()
)
(layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
(layers): ModuleList(
(0-11): 12 x Wav2Vec2EncoderLayer(
(attention): Wav2Vec2SdpaAttention(
(k_proj): Linear(in_features=768, out_features=768, bias=True)
(v_proj): Linear(in_features=768, out_features=768, bias=True)
(q_proj): Linear(in_features=768, out_features=768, bias=True)
(out_proj): Linear(in_features=768, out_features=768, bias=True)
)
(dropout): Dropout(p=0.1, inplace=False)
(layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(feed_forward): Wav2Vec2FeedForward(
(intermediate_dropout): Dropout(p=0.1, inplace=False)
(intermediate_dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
(output_dense): Linear(in_features=3072, out_features=768, bias=True)
(output_dropout): Dropout(p=0.1, inplace=False)
)
(final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
)
)
(dropout): Dropout(p=0.1, inplace=False)
(lm_head): Linear(in_features=768, out_features=30, bias=True)