-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
126 lines (110 loc) · 4.51 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import wave
import pickle
import pyaudio
import warnings
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn.mixture import GaussianMixture
from listen import *
from time import time
import speech_recognition as sr
import playsound
from gtts import gTTS
def extract_features(audio,rate):
mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
#print(mfcc_feature)
delta = calculate_delta(mfcc_feature)
combined = np.hstack((mfcc_feature,delta))
return combined
def calculate_delta(array):
rows,cols = array.shape
#print(rows)
#print(cols)
deltas = np.zeros((rows,20))
N = 2
for i in range(rows):
index = []
j = 1
while j <= N:
if i-j < 0:
first =0
else:
first = i-j
if i+j > rows-1:
second = rows-1
else:
second = i+j
index.append((second,first))
j+=1
deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
return deltas
def record_audio_test():
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 512
RECORD_SECONDS = 10
device_index = 2
audio = pyaudio.PyAudio()
# print("----------------------record device list---------------------")
# info = audio.get_host_api_info_by_index(0)
# numdevices = info.get('deviceCount')
# for i in range(0, numdevices):
# if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: print("Input Device id ", i, " - ", audio.get_device_info_by_host_api_device_index(0, i).get('name'))
# # print("-------------------------------------------------------------")
index = 0#int(input())
#print("recording via index "+str(index))
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,input_device_index = index,
frames_per_buffer=CHUNK)
print ("recording started")
Recordframes = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
Recordframes.append(data)
print ("recording stopped")
stream.stop_stream()
stream.close()
audio.terminate()
OUTPUT_FILENAME="sample.wav"
WAVE_OUTPUT_FILENAME=os.path.join("testing_set",OUTPUT_FILENAME)
trainedfilelist = open("testing_set_addition.txt", 'a')
trainedfilelist.write(OUTPUT_FILENAME+"\n")
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(Recordframes))
waveFile.close()
def test_model():
source = "C:\\Users\\aswinik\\Documents\\Assignments\\code_ed-main\\AudioAssessment\\testing_set\\"
modelpath = "C:\\Users\\aswinik\\Documents\\Assignments\\code_ed-main\\AudioAssessment\\"
test_file = "C:\\Users\\aswinik\\Documents\\Assignments\\code_ed-main\\AudioAssessment\\testing_set_addition.txt" #replace paths with your local directory
file_paths = open(test_file,'r')
gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')]
#gmm_files = ["C:\\Users\\marka\\Desktop\\Programming\\Hackathon project\\trained_modelsMark.gmm","C:\\Users\\marka\\Desktop\\Programming\\Hackathon project\\trained_modelsNeha.gmm","C:\\Users\\marka\\Desktop\\Programming\\Hackathon project\\trained_modelsCrazy.gmm"]
#Load the Gaussian gender Models
models = [pickle.load(open(fname,'rb')) for fname in gmm_files]
speakers = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files]
# Read the test directory and get the list of test audio files
for path in file_paths:
path = path.strip()
# print(path)
sr,audio = read(source + path)
vector = extract_features(audio,sr)
log_likelihood = np.zeros(len(models))
for i in range(len(models)):
gmm = models[i] #checking with each model one by one
scores = np.array(gmm.score(vector))
log_likelihood[i] = scores.sum()
winner = np.argmax(log_likelihood)
print("\tdetected as - ", speakers[winner])
return(speakers[winner])
# time.sleep(1.0)
#choice=int(input("\n1.Record audio for training \n 2.Train Model \n 3.Record audio for testing \n 4.Test Model\n"))
# record_audio_test()
# test_model()