forked from NVIDIA/tacotron2
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathinference_waveglow.py
110 lines (87 loc) · 3.6 KB
/
inference_waveglow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# coding: utf-8
"""
Synthesis waveform with Waveglow.
usage: inference_waveglow.py [options] <dst_dir>
options:
--hparams=<parmas> Hyper parameters [default: ].
-i, --input-file=<p> Input txt file path.
-t, --tacotron-checkpoint=<p> Tacotron Checkpoint Path
-w, --waveglow-checkpoint=<p> Waveglow Checkpoint Path
-d. --denoiser Use Waveglow Denoiser
-h, --help Show help message.
"""
from docopt import docopt
from os import makedirs
from os.path import dirname, join, basename, splitext
import torch
import numpy as np
import soundfile as sf
# Tacotron2 modules
from model import Tacotron2
from hparams import create_hparams
from train import load_model
from text import text_to_sequence
import sys
sys.path.append('waveglow/')
from denoiser import Denoiser
MAX_WAV_VALUE = 32767.5
hparams = create_hparams()
hparams.sampling_rate = 22050
hparams.filter_length = 1024
hparams.hop_length = 256
hparams.win_length = 1024
def load_waveglow(waveglow_checkpoint):
waveglow_model = torch.load(waveglow_checkpoint)['model']
return waveglow_model.cuda()
def load_tacotron2(tacotron_checkpoint_path):
global hparams
model = load_model(hparams)
model.load_state_dict(torch.load(tacotron_checkpoint_path)['state_dict'])
_ = model.eval()
return model
def waveform_generation(text, tacotron_model, waveglow_model, use_denoiser=False):
# Prepare text input
sequence = np.array(text_to_sequence(text, ['basic_cleaners']))[None, :]
sequence = torch.autograd.Variable(
torch.from_numpy(sequence)).cuda().long()
# Tacotron Inference
mel_outputs, mel_outputs_postnet, _, alignments = tacotron_model.inference(sequence)
# Waveglow Inference
with torch.no_grad():
waveform = waveglow_model.infer(mel_outputs_postnet, sigma=0.666)
if use_denoiser:
print("Using denoiser")
denoiser = Denoiser(waveglow_model)
waveform = denoiser(waveform, strength=0.01)[:, 0]
return waveform[0].data.cpu().numpy()
if __name__ == "__main__":
args = docopt(__doc__)
print("Command line args:\n", args)
input_file_path = args["--input-file"]
tacotron_checkpoint = args["--tacotron-checkpoint"]
waveglow_checkpoint = args["--waveglow-checkpoint"]
use_denoiser = args["--denoiser"]
dst_dir = args["<dst_dir>"]
# Create output directory
makedirs(dst_dir, exist_ok=True)
checkpoint_wav_name = splitext(basename(waveglow_checkpoint))[0].replace('waveglow_', '')
checkpoint_taco_name = splitext(basename(tacotron_checkpoint))[0].replace('checkpoint_', '')
tacotron_model = load_tacotron2(tacotron_checkpoint)
waveglow_model = load_waveglow(waveglow_checkpoint)
try:
with open(input_file_path) as f:
content = f.read().splitlines()
except FileNotFoundError:
print("File {} not found.".format(input_file_path))
# Create output directory
subdir = 'samples_waveglow_' + checkpoint_wav_name + '_taco_' + checkpoint_taco_name
makedirs(join(dst_dir, subdir), exist_ok=True)
for i, text in enumerate(content):
print("Generating Waveform " + str(i))
waveform = waveform_generation(text, tacotron_model, waveglow_model, use_denoiser)
# save
output_filepath = join(dst_dir, subdir, "{}.wav".format(i))
sf.write(output_filepath, waveform, samplerate=hparams.sampling_rate)
print("Waveform {} OK".format(i))
print("Finished! Check out {} for generated audio samples.".format(dst_dir))
sys.exit(0)