-
Notifications
You must be signed in to change notification settings - Fork 17
/
hparameter.yaml
76 lines (68 loc) · 1.73 KB
/
hparameter.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
train:
batch_size: 12
adam:
lr: 3e-4
weight_decay: 1e-6
decay:
rate: 0.05
start: 25000
end: 100000
num_workers: 16
gpus: 2 #ddp
loss_rate:
dur: 1.0
data:
lang: 'eng'
text_cleaners: ['english_cleaners'] # korean_cleaners, english_cleaners, chinese_cleaners
speakers: ['LJSpeech']
train_dir: 'preprocessed_data/LJSpeech'
train_meta: 'train.txt' # relative path of metadata file from train_dir
val_dir: 'preprocessed_data/LJSpeech'
val_meta: 'val.txt' # relative path of metadata file from val_dir'
lexicon_path: 'lexicon/librispeech-lexicon.txt'
audio:
n_mel_channels: 80
filter_length: 1024
hop_length: 256
win_length: 1024
sampling_rate: 22050
mel_fmin: 0.0
mel_fmax: 8000.0
encoder:
channel: 512
kernel: 5
depth: 3
dropout_rate: 0.5
speaker_emb: 64
dur_predictor:
dur_lstm_channel: 512
range_lstm_channel: 512
window:
scale: 300
length: 256
wavegrad:
is_large: False #if False, Base
encode_channel: 576 ##512+64
scale_factors: [5,5,3,2,2]
upsample:
preconv_channel: 768
out_channels: [512, 512, 256, 128, 128]
#dilations: [[1,2,4,8],[1,2,4,8],[1,2,4,8],[1,2,4,8],[1,2,4,8]]
dilations: [[1,2,4,8],[1,2,4,8],[1,2,4,8],[1,2,1,2],[1,2,1,2]]
downsample:
preconv_channel: 32
out_channels: [128, 128, 256, 512]
dilations: [[1,2,4],[1,2,4],[1,2,4],[1,2,4]]
pos_emb_dim: 512
ddpm:
max_step: 1000
noise_schedule: "torch.linspace(1e-6, 0.01, hparams.ddpm.max_step)"
pos_emb_scale: 5000
pos_emb_channels: 128
infer_step: 8 #TBD
infer_schedule: "torch.tensor([1e-6,2e-6,1e-5,1e-4,1e-3,1e-2,1e-1,9e-1])" #TBD
log:
name: 'wavegrad2'
checkpoint_dir: 'checkpoint'
tensorboard_dir: 'tensorboard'
test_result_dir: 'test_sample/result'