-
Notifications
You must be signed in to change notification settings - Fork 22
/
comp_snr.m
134 lines (107 loc) · 5.06 KB
/
comp_snr.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
function [snr_mean, segsnr_mean]= comp_SNR(cleanFile, enhdFile);
%
% Segmental Signal-to-Noise Ratio Objective Speech Quality Measure
%
% This function implements the segmental signal-to-noise ratio
% as defined in [1, p. 45] (see Equation 2.12).
%
% Usage: [SNRovl, SNRseg]=comp_snr(cleanFile.wav, enhancedFile.wav)
%
% cleanFile.wav - clean input file in .wav format
% enhancedFile - enhanced output file in .wav format
% SNRovl - overall SNR (dB)
% SNRseg - segmental SNR (dB)
%
% This function returns 2 parameters. The first item is the
% overall SNR for the two speech signals. The second value
% is the segmental signal-to-noise ratio (1 seg-snr per
% frame of input). The segmental SNR is clamped to range
% between 35dB and -10dB (see suggestions in [2]).
%
% Example call: [SNRovl,SNRseg]=comp_SNR('sp04.wav','enhanced.wav')
%
% References:
%
% [1] S. R. Quackenbush, T. P. Barnwell, and M. A. Clements,
% Objective Measures of Speech Quality. Prentice Hall
% Advanced Reference Series, Englewood Cliffs, NJ, 1988,
% ISBN: 0-13-629056-6.
%
% [2] P. E. Papamichalis, Practical Approaches to Speech
% Coding, Prentice-Hall, Englewood Cliffs, NJ, 1987.
% ISBN: 0-13-689019-9. (see pages 179-181).
%
% Authors: Bryan L. Pellom and John H. L. Hansen (July 1998)
% Modified by: Philipos C. Loizou (Oct 2006)
%
% Copyright (c) 2006 by Philipos C. Loizou
% $Revision: 0.0 $ $Date: 10/09/2006 $
%-------------------------------------------------------------------------
if nargin ~=2
fprintf('USAGE: [snr_mean, segsnr_mean]= comp_SNR(cleanFile, enhdFile) \n');
return;
end
[data1, Srate1, Nbits1]= wavread(cleanFile);
[data2, Srate2, Nbits2]= wavread(enhdFile);
if (( Srate1~= Srate2) | ( Nbits1~= Nbits2) | ( length( data1)~= length( data2)))
error( 'The two files do not match!\n');
end
% len= min( length( data1), length( data2));
% data1= data1( 1: len);
% data2= data2( 1: len);
% data1= (data1 - mean(data1))/std(data1); % MVN
% data2= (data2 - mean(data2))/std(data2);
[snr_dist, segsnr_dist]= snr( data1, data2,Srate1);
snr_mean= snr_dist;
segsnr_mean= mean( segsnr_dist);
% =========================================================================
function [overall_snr, segmental_snr] = snr(clean_speech, processed_speech,sample_rate)
% ----------------------------------------------------------------------
% Check the length of the clean and processed speech. Must be the same.
% ----------------------------------------------------------------------
clean_length = length(clean_speech);
processed_length = length(processed_speech);
if (clean_length ~= processed_length)
disp('Error: Both Speech Files must be same length.');
return
end
% ----------------------------------------------------------------------
% Scale both clean speech and processed speech to have same dynamic
% range. Also remove DC component from each signal
% ----------------------------------------------------------------------
%clean_speech = clean_speech - mean(clean_speech);
%processed_speech = processed_speech - mean(processed_speech);
%processed_speech = processed_speech.*(max(abs(clean_speech))/ max(abs(processed_speech)));
overall_snr = 10* log10( sum(clean_speech.^2)/sum((clean_speech-processed_speech).^2));
% ----------------------------------------------------------------------
% Global Variables
% ----------------------------------------------------------------------
winlength = round(30*sample_rate/1000); %240; % window length in samples for 30-msecs
skiprate = floor(winlength/4); %60; % window skip in samples
MIN_SNR = -10; % minimum SNR in dB
MAX_SNR = 35; % maximum SNR in dB
% ----------------------------------------------------------------------
% For each frame of input speech, calculate the Segmental SNR
% ----------------------------------------------------------------------
num_frames = clean_length/skiprate-(winlength/skiprate); % number of frames
start = 1; % starting sample
window = 0.5*(1 - cos(2*pi*(1:winlength)'/(winlength+1)));
for frame_count = 1: num_frames
% ----------------------------------------------------------
% (1) Get the Frames for the test and reference speech.
% Multiply by Hanning Window.
% ----------------------------------------------------------
clean_frame = clean_speech(start:start+winlength-1);
processed_frame = processed_speech(start:start+winlength-1);
clean_frame = clean_frame.*window;
processed_frame = processed_frame.*window;
% ----------------------------------------------------------
% (2) Compute the Segmental SNR
% ----------------------------------------------------------
signal_energy = sum(clean_frame.^2);
noise_energy = sum((clean_frame-processed_frame).^2);
segmental_snr(frame_count) = 10*log10(signal_energy/(noise_energy+eps)+eps);
segmental_snr(frame_count) = max(segmental_snr(frame_count),MIN_SNR);
segmental_snr(frame_count) = min(segmental_snr(frame_count),MAX_SNR);
start = start + skiprate;
end