forked from NVIDIA/tacotron2
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathinference_mel.py
101 lines (78 loc) · 3.25 KB
/
inference_mel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# coding: utf-8
"""
Synthesis mel with tacotron.
usage: inference_mel.py [options] <dst_dir>
options:
--hparams=<parmas> Hyper parameters [default: ].
-i, --input-file=<p> Input txt file path.
-f, --file-name-suffix=<s> File name suffix [default: ].
-t, --tacotron-checkpoint=<p> Tacotron Checkpoint Path
-h, --help Show help message.
"""
from docopt import docopt
import sys
import os
from os.path import dirname, join, basename, splitext
import torch
import numpy as np
#################################################################
# Tacotron Methods
################################################################
# from Tacotron Modules
from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
import audio_processing
def mel_spectrogram_generation(checkpoint_path, text, hparams):
# #### Load model from checkpoint
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.eval()
# #### Prepare text input
#text = "amor é fogo que arde sem se ver."
sequence = np.array(text_to_sequence(text, ['basic_cleaners']))[None, :]
sequence = torch.autograd.Variable(
torch.from_numpy(sequence)).cuda().long()
# #### Decode text input
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
return mel_outputs_postnet.data.cpu()
#################################################################
# Main
################################################################
if __name__ == "__main__":
args = docopt(__doc__)
print("Command line args:\n", args)
tacotron_checkpoint = args["--tacotron-checkpoint"]
dst_dir = args["<dst_dir>"]
input_file_path = args["--input-file"]
file_name_suffix = args["--file-name-suffix"]
# Create output directory
os.makedirs(dst_dir, exist_ok=True)
checkpoint_taco_name = splitext(basename(tacotron_checkpoint))[0].replace('checkpoint_', '')
hparams = create_hparams("distributed_run=False,mask_padding=False")
hparams.sampling_rate = 22050
hparams.filter_length = 1024
hparams.hop_length = 256
hparams.win_length = 1024
# Mel-spectrogram generation with tacotron
mel_spectrograms_list = []
try:
with open(input_file_path) as f:
content = f.read().splitlines()
for i, text in enumerate(content):
print("Generating waveform " + str(i))
mel = mel_spectrogram_generation(tacotron_checkpoint, text, hparams)
# Create output directory
output_name = 'output_audio_wav_griffin_lim_taco_' + checkpoint_taco_name
os.makedirs(os.path.join(dst_dir, output_name), exist_ok=True)
dst_mel_path = join(os.path.join(dst_dir, output_name), "{}{}.npy".format(i, file_name_suffix))
# save
np.save(dst_mel_path, mel)
print("Mel {} OK".format(i))
except FileNotFoundError:
print("File not found.")
print("Finished! Check out {} for generated audio samples.".format(dst_dir))
sys.exit(0)