-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlea_yin_estimator.m
194 lines (168 loc) · 5.46 KB
/
lea_yin_estimator.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
% function [time, f0, Start, End ] = lea_yin_estimator(x,fs,delta_t,delta_f,signal_mini_duration_s,varargin)
%
% This pitch tracking function is based on the function https://github.com/orchidas/Pitch-Tracking/blob/master/yin_estimator.m
% that implements YIN algorithm for fundamental pitch tracking.
%
% INPUTS:
% - x: observation signal,
% - fs: sampling frequency (Hz),
% - delta_t: minimum time interval between two tonals (s),
% - delta_f: minimum frequency interval between two tonals (Hz),
% - signal_mini_duration: minimum tonal signal duration (samples).
%
% OUTPUTS:
% - f0: frequency estimate,
% - time: time vector of the frequency tracks,
% - Start: Starting sample of each detected tonal signal,
% - End: Ending sample of each tonal signal.
function [time, f0, Start, End ] = lea_yin_estimator(x,fs,delta_t,delta_f,signal_mini_duration_s,varargin)
% window size - we assume the minimum f0_yin to be 1/0.25 = 4Hz
win = round(0.25*fs);
N = length(x);
nframes = ceil(N/win);
% zero pad signal to have enough frames
[x] = vector_orientation(x,'column');
x = [x, zeros(1,win*nframes - N)];
x_frame = zeros(nframes, win);
start = 1;
%break into windows
for i = 1:nframes
x_frame(i,:) = x(start:start + win - 1);
start = start + win;
end
%step 1 - calculate difference function
d = zeros(nframes,win);
x_temp = [x_frame, zeros(nframes,win)];
for tau = 0:win-1
for j = 1:win
d(:,tau+1) = d(:,tau+1) + (x_temp(:,j) - x_temp(:,j+tau)).^2;
end
end
%step 2 - cumulative mean normalised difference function
d_norm = zeros(nframes,win);
d_norm(:,1) = 1;
for i = 1:nframes
for tau = 1:win-1
d_norm(i,tau+1) = d(i,tau+1)/((1/tau) * sum(d(i,1:tau+1)));
end
end
% figure(1);
% subplot(211);
% plot(0:length(x)-1, reshape(d',1,length(x)));grid on;
% xlabel('Lags');
% ylabel('Difference function');
% subplot(212);
% plot(0:length(x)-1, reshape(d_norm',1,length(x)));grid on;
% xlabel('Lags');
% ylabel('Cumulative mean difference function');
%step 3 - absolute thresholding
lag = zeros(1,nframes);
th = 0.1;
for i = 1:nframes
l = find(d_norm(i,:) < th,1);
if(isempty(l) == 1)
[v,l] = min(d_norm(i,:));
end
lag(i) = l;
end
%step 4 - parabolic interpolation
period = zeros(1,nframes);
start = 1;
for i = 1:nframes
if(lag(i) > 1 && lag(i) < win)
alpha = d_norm(i,lag(i)-1);
beta = d_norm(i,lag(i));
gamma = d_norm(i,lag(i)+1);
peak = 0.5*(alpha - gamma)/(alpha - 2*beta + gamma);
%ordinate needs to be calculated from d and not d_norm - see paper
%ordinate = d(i,lag(i)) - 0.25*(d(i,lag(i)-1) - d(i,lag(i)+1))*peak;
else
peak = 0;
end
%1 needs to be subtracted from 1 due to matlab's indexing nature
period(i) = (lag(i)-1) + peak;
% f0_yin(i,:) = fs/period(i)*ones(1,win);
% time_yin(i,:) = ((i-1)*win:i*win-1)/fs;
end
f0_yin = fs./period;
time_yin_interm =linspace(0,length(x)/fs,length(f0_yin));
% time_yin_interm = time_yin_interm(f0_yin>10);
% f0_yin = f0_yin(f0_yin>10);
time_yin_interm(f0_yin<010) = NaN;
f0_yin(f0_yin<10) = NaN;
%for silent frames estimated frequency should be 0Hz
if ~isempty(varargin)
[f0_yin] = silent_frame_classification(x_frame, f0_yin);
end
%step 5 - "Smooth" the instantaneous frequency using a median filter
f0_yin = medfilt1(f0_yin,11);
if isempty(f0_yin(isnan(f0_yin)==0))
f0_yin = NaN;
time_yin = NaN;
Start = NaN;
End = NaN;
else
%step 6 - separate the tracked tonals
Start = 1;
End = [];
% Detection of the pitch track contours (start and end point)
index = isnan(f0_yin)==0; % index = 0 <=> signal detecte
f0_short = f0_yin(index);
t_short = time_yin_interm(index);
for i = 2:length(f0_short)-1
if (abs(f0_short(i)-f0_short(i-1))>delta_f)||(abs(t_short(i)-t_short(i-1))>delta_t),
Start = [Start i];
End = [End i-1];
end
end
End = [End length(index)];
% Clear the detection of signal shorter than "signal_mini_duration"
for i = 1: length(Start)
if (End(i)-Start(i) < round( signal_mini_duration_s/(time_yin_interm(2)-time_yin_interm(1)) ) )%|| (abs((f0_short(Start(i))-f0_short(End(i)))/(t_short(End(i))-t_short(Start(i))))<0.5),
f0_short(Start(i):End(i))=NaN(1,End(i)-Start(i)+1);
t_short(Start(i):End(i))=NaN(1,End(i)-Start(i)+1);
End(i) = NaN;
Start(i) = NaN;
end
end
End = End(isnan(End)==0);
Start = Start(isnan(Start)==0);
Start_new = Start;
End_new = End;
% Attach segments that belong together
for i = 2:length(Start)
if t_short(Start(i))-t_short(End(i-1)) < delta_t
Start_new(i) = NaN;
End_new(i-1) = NaN;
end
end
End = End_new(isnan(End_new)==0);
Start = Start_new(isnan(Start_new)==0);
End(end) = length(t_short);
ind = find(End-Start<=0);
if isnan(ind)==0
End = [End(1:ind-1) End(ind+1:end)];
Start = [Start(1:ind-1) Start(ind+1:end)];
end
f0 = f0_short;
% Good size
time = (0:N-1)/fs;
if length(Start) > 1
f0_yin_final = NaN(size(time_yin));
for i = 1:length(Start)
Start_old = Start(i);
End_old = End(i);
Start(i) = find(time_yin >= t_short(Start_old),1);
End(i) = find(time_yin >= t_short(End_old),1)-1;
f0_yin_final(Start(i):End(i)) = interp1(t_short(Start_old:End_old),f0_yin(Start_old:End_old),time_yin(Start(i):End(i)));
end
f0 = f0_yin_final;
end
end
ind = find(End-Start<=0);
if isnan(ind)==0
End(ind) = NaN;
Start(ind)= NaN;
End = End(isnan(End)==0);
Start = Start(isnan(Start)==0);
end