-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrigrams.py
101 lines (78 loc) · 3.46 KB
/
trigrams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import os
import pathlib
import argparse
import warnings
from typing import Tuple, List
from sklearn import preprocessing
from tqdm import tqdm
from src.utils.mp_util import round_robin_map
from src.utils.tsv import read_tsv, TSVEntry, write_tsv
warnings.filterwarnings('ignore', 'PySoundFile failed. Trying audioread instead.')
def collect_phonemes_and_filenames(v: Tuple[str, pathlib.Path, pathlib.Path, preprocessing.LabelEncoder]) -> \
List[Tuple[str, int, int, pathlib.Path]]:
fname, clip_name, phoneme_dir, enc = v
phonemes = []
try:
with open(clip_name, 'r') as fp:
alignment = json.load(fp)
for w in alignment['words']:
for it, (name, _, _) in enumerate(w['phonemes']):
phonemes.append((fname, it, enc.transform([name])[0],
phoneme_dir / name / (fname + '_{}.wav'.format(it))))
except json.JSONDecodeError:
return []
except OSError:
return []
return phonemes
def generate_tsv_file(tsv_file_in: pathlib.Path, tsv_file_out: pathlib.Path,
clip_dir: pathlib.Path, phoneme_dir: pathlib.Path):
tsv = read_tsv(tsv_file_in)
phones = []
for root, dirs, files in os.walk(phoneme_dir):
for d in dirs:
phones.append(d)
break
enc = preprocessing.LabelEncoder().fit(phones)
space = enc.transform(['sp'])[0]
clips = []
for e in tqdm(tsv, desc='Generating indices'):
fname = e['path'].split('.')[0]
clip_name = clip_dir / (fname + '.json')
if clip_name.exists():
clips.append((fname, clip_name, phoneme_dir, enc))
# indices = round_robin_map(clips, collect_phonemes_and_filenames, tqdm_label='Generating phoneme indices')
indices = []
for c in tqdm(clips, desc='Generating phoneme indices'):
indices.append(collect_phonemes_and_filenames(c))
fmap = {}
for index in tqdm(indices, desc='Parsing mp result'):
for fname, it, pv, pf in index:
if fname not in fmap:
fmap[fname] = []
fmap[fname].append((it, pv, pf))
indices = []
for fname in tqdm(fmap, desc='Generating tsv'):
fmap[fname].sort(key=lambda x: x[0])
phonemes = fmap[fname]
for it, (_, p, d) in enumerate(phonemes[1:]):
_, pp, pd = phonemes[it]
_, ppp, _ = phonemes[it + 1]
if it == 0:
trigram = (space, pp, p)
elif it == len(phonemes[1:]) - 1:
trigram = (pp, p, space)
else:
trigram = (ppp, pp, p)
if pd.exists():
ent = TSVEntry(['previous', 'current', 'next', 'directory'], [*trigram, str(pd)])
indices.append(ent)
write_tsv(tsv_file_out, indices)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Processes the phonemes for the audio files')
parser.add_argument('tsv_file_in', type=pathlib.Path, help='The file containing the list of sample file locations')
parser.add_argument('tsv_file_out', type=pathlib.Path, help='The file to store the new TSV file in')
parser.add_argument('clips_dir', type=pathlib.Path, help='The directory containing the sample files')
parser.add_argument('phonemes_dir', type=pathlib.Path, help='The directory containing the sample phoneme files')
args = parser.parse_args()
generate_tsv_file(args.tsv_file_in, args.tsv_file_out, args.clips_dir, args.phonemes_dir)