-
Notifications
You must be signed in to change notification settings - Fork 1
/
metrics.py
296 lines (244 loc) · 8.61 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
"""
EDER from https://github.com/BenoitWang/Speech_Emotion_Diarization/blob/main/utils/EDER.py
by Yingzhi Wang 2023
"""
class IEMOCAP_Meter:
"""Computes and stores the current best value"""
def __init__(self):
self.reset()
def reset(self):
self.WA = 0.
self.UA = 0.
self.WER = 100.
self.CER = 100.
self.eder = 100.
def update(self, WA, UA, WER, CER, eder=None):
if UA > self.UA:
self.UA = UA
if WA > self.WA:
self.WA = WA
if WER < self.WER:
self.WER = WER
if CER < self.CER:
self.CER = CER
if eder is not None and eder < self.eder:
self.eder = eder
def EDER(prediction, id, duration, emotion, window_length=0.02, stride=0.02):
""" Calculates the EDER value, modified from https://github.com/BenoitWang/Speech_Emotion_Diarization/blob/main/utils/EDER.py
Args:
prediction (list): a list of frame-wise predictions of the utterance
id (str): id of the utterance
duration (float): duration of the utterance
emotion (list of dicts): the ground truth emotion and its duration,
e.g. [{'emo': 'angry', 'start': 1.016, 'end': 6.336}]
window_length (float): the frame length used for frame-wise prediction
stride (float): the frame length used for frame-wise prediction
Returns:
float: the calculted EDER for the utterance
Example
-------
>>> from speechbrain.utils.EDER import EDER
>>> prediction=['n', 'n', 'n', 'a', 'a', 'a']
>>> id="spk1_1"
>>> duration=1.22
>>> emotion=[{'emo': 'angry', 'start': 0.39, 'end': 1.10}]
>>> window_length = 0.2
>>> stride = 0.2
>>> EDER(prediction, id, duration, emotion, window_length, stride)
0.2704918032786885
"""
lol = []
for i in range(len(prediction)):
start = stride * i
end = start + window_length
lol.append([id, start, end, prediction[i]])
lol = merge_ssegs_same_emotion_adjacent(lol)
if len(lol) != 1:
lol = distribute_overlap(lol)
ref = reference_to_lol(id, duration, emotion)
good_preds = 0
for i in ref:
candidates = [element for element in lol if element[3] == i[3]]
ref_interval = [i[1], i[2]]
for candidate in candidates:
overlap = getOverlap(ref_interval, [candidate[1], candidate[2]])
good_preds += overlap
return 1 - good_preds / duration
def getOverlap(a, b):
""" get the overlapped length of two intervals
Arguments
---------
a : list
b : list
Returns:
float: overlapped length
Example
-------
>>> from speechbrain.utils.EDER import getOverlap
>>> interval1=[1.2, 3.4]
>>> interval2=[2.3, 4.5]
>>> getOverlap(interval1, interval2)
1.1
"""
return max(0, min(a[1], b[1]) - max(a[0], b[0]))
def is_overlapped(end1, start2):
"""Returns True if segments are overlapping.
Arguments
---------
end1 : float
End time of the first segment.
start2 : float
Start time of the second segment.
Returns
-------
overlapped : bool
True of segments overlapped else False.
Example
-------
>>> from speechbrain.processing import diarization as diar
>>> diar.is_overlapped(5.5, 3.4)
True
>>> diar.is_overlapped(5.5, 6.4)
False
"""
if start2 > end1:
return False
else:
return True
def merge_ssegs_same_emotion_adjacent(lol):
"""Merge adjacent sub-segs if they are the same emotion.
Arguments
---------
lol : list of list
Each list contains [utt_id, sseg_start, sseg_end, emo_label].
Returns
-------
new_lol : list of list
new_lol contains adjacent segments merged from the same emotion ID.
Example
-------
>>> from speechbrain.utils.EDER import merge_ssegs_same_emotion_adjacent
>>> lol=[['u1', 0.0, 7.0, 'a'],
... ['u1', 7.0, 9.0, 'a'],
... ['u1', 9.0, 11.0, 'n'],
... ['u1', 11.0, 13.0, 'n'],
... ['u1', 13.0, 15.0, 'n'],
... ['u1', 15.0, 16.0, 'a']]
>>> merge_ssegs_same_emotion_adjacent(lol)
[['u1', 0.0, 9.0, 'a'], ['u1', 9.0, 15.0, 'n'], ['u1', 15.0, 16.0, 'a']]
"""
new_lol = []
# Start from the first sub-seg
sseg = lol[0]
flag = False
for i in range(1, len(lol)):
next_sseg = lol[i]
# IF sub-segments overlap AND has same emotion THEN merge
if is_overlapped(sseg[2], next_sseg[1]) and sseg[3] == next_sseg[3]:
sseg[2] = next_sseg[2] # just update the end time
# This is important. For the last sseg, if it is the same emotion then merge
# Make sure we don't append the last segment once more. Hence, set FLAG=True
if i == len(lol) - 1:
flag = True
new_lol.append(sseg)
else:
new_lol.append(sseg)
sseg = next_sseg
# Add last segment only when it was skipped earlier.
if flag is False:
new_lol.append(lol[-1])
return new_lol
def reference_to_lol(id, duration, emotion):
"""change reference to a list of list
Arguments
---------
id (str): id of the utterance
duration (float): duration of the utterance
emotion (list of dicts): the ground truth emotion and its duration,
e.g. [{'emo': 'angry', 'start': 1.016, 'end': 6.336}]
Returns
-------
lol : list of list
It has each list structure as [rec_id, sseg_start, sseg_end, spkr_id].
Example
-------
>>> from speechbrain.utils.EDER import reference_to_lol
>>> id="u1"
>>> duration=8.0
>>> emotion=[{'emo': 'angry', 'start': 1.016, 'end': 6.336}]
>>> reference_to_lol(id, duration, emotion)
[['u1', 0, 1.016, 'n'], ['u1', 1.016, 6.336, 'a'], ['u1', 6.336, 8.0, 'n']]
"""
assert (
len(emotion) == 1
), "NotImplementedError: The solution is only implemented for one-emotion utterance for now."
lol = []
start = emotion[0]["start"]
end = emotion[0]["end"]
if start > 0:
lol.append([id, 0, start, "n"])
lol.append([id, start, end, emotion[0]["emo"][0]])
if end < duration:
lol.append([id, end, duration, "n"])
return lol
def distribute_overlap(lol):
"""Distributes the overlapped speech equally among the adjacent segments
with different emotions.
Arguments
---------
lol : list of list
It has each list structure as [rec_id, sseg_start, sseg_end, spkr_id].
Returns
-------
new_lol : list of list
It contains the overlapped part equally divided among the adjacent
segments with different emotion IDs.
Example
-------
>>> from speechbrain.processing import diarization as diar
>>> lol = [['r1', 5.5, 9.0, 's1'],
... ['r1', 8.0, 11.0, 's2'],
... ['r1', 11.5, 13.0, 's2'],
... ['r1', 12.0, 15.0, 's1']]
>>> diar.distribute_overlap(lol)
[['r1', 5.5, 8.5, 's1'], ['r1', 8.5, 11.0, 's2'], ['r1', 11.5, 12.5, 's2'], ['r1', 12.5, 15.0, 's1']]
"""
new_lol = []
sseg = lol[0]
# Add first sub-segment here to avoid error at: "if new_lol[-1] != sseg:" when new_lol is empty
# new_lol.append(sseg)
for i in range(1, len(lol)):
next_sseg = lol[i]
# No need to check if they are different emotions.
# Because if segments are overlapped then they always have different emotions.
# This is because similar emotion's adjacent sub-segments are already merged by "merge_ssegs_same_emotion()"
if is_overlapped(sseg[2], next_sseg[1]):
# Get overlap duration.
# Now this overlap will be divided equally between adjacent segments.
overlap = sseg[2] - next_sseg[1]
# Update end time of old seg
sseg[2] = sseg[2] - (overlap / 2.0)
# Update start time of next seg
next_sseg[1] = next_sseg[1] + (overlap / 2.0)
if len(new_lol) == 0:
# For first sub-segment entry
new_lol.append(sseg)
else:
# To avoid duplicate entries
if new_lol[-1] != sseg:
new_lol.append(sseg)
# Current sub-segment is next sub-segment
sseg = next_sseg
else:
# For the first sseg
if len(new_lol) == 0:
new_lol.append(sseg)
else:
# To avoid duplicate entries
if new_lol[-1] != sseg:
new_lol.append(sseg)
# Update the current sub-segment
sseg = next_sseg
# Add the remaining last sub-segment
new_lol.append(next_sseg)
return new_lol