forked from Masaaki88/ListenAndBabble
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfusion_matrix.py
602 lines (485 loc) · 21.6 KB
/
confusion_matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
import numpy as np
# Tools for working with confusion matrices, which are useful for the evaluation
# of classification tasks.
#
# The centerpiece is the ConfusionMatrix class, which encapsulates a rectangular
# numpy array. A number of error measures are implemented as properties. Some
# methods to construct this matrix from test data are also provided.
#
# The BinaryConfusionMatrix class extends the ConfusionMatrix class with some
# functionality that is specific to binary classification problems.
#
# The plot_conf function provides a quick way of visualising a confusion matrix.
# Balanced (row-normalised) matrices give the best results.
#
#
# Usage example:
#
# >>> c = ConfusionMatrix.from_samples(num_classes, predictions, targets, labels)
# >>> print "BER: %f" % c.ber
# >>> print "Error rates per class: %s" % repr(c.error_rates)
# >>> plot_conf(c.balance())
#
# Note: a ConfusionMatrix wraps a numpy ARRAY, not a numpy MATRIX.
class ConfusionMatrix(object):
"""
This class represents a confusion matrix, and encapsulates a rectangular 2D
numpy array. It can be constructed from such an array by using the constructor,
or from a set of predictions and target labels through the from_data and
from_samples class methods.
The class implements a number of commonly used accuracy and error measures
based on confusion matrices as properties.
"""
def __init__(self, input_array, labels=None):
arr = np.asarray(input_array).astype(float)
self._assert_dimensions(arr)
self._arr = arr
self._init_labels(len(arr), labels)
def _init_labels(self, num_classes, labels):
if labels is not None:
self._assert_labels(num_classes, labels)
self.labels = labels
else:
self.labels = range(0, num_classes) # default to labels 0, 1, 2, ..., N-1
def _check_ndim(self, arr):
# check if it is a 2D array
return arr.ndim == 2
def _check_shape(self, arr):
# check if it is a rectangular array
return arr.shape[0] == arr.shape[1]
def _check_dimensions(self, arr):
return self._check_ndim(arr) and self._check_shape(arr)
def _assert_dimensions(self, arr): # like _check_dimensions, but with exceptions.
if not self._check_ndim(arr):
raise RuntimeError("ConfusionMatrix should be 2-dimensional - ndim is %d" % arr.ndim)
elif not self._check_shape(arr):
raise RuntimeError("ConfusionMatrix should be rectangular - shape is %dx%d" % arr.shape)
def _check_labels_count(self, num_classes, labels):
return len(labels) == num_classes
def _check_labels_uniqueness(self, labels):
return len(np.unique(labels)) == len(labels)
def _check_labels(self, num_classes, labels):
return self._check_labels_count(num_classes, labels) and self._check_labels_uniqueness(labels)
def _assert_labels(self, num_classes, labels):
if not self._check_labels_count(num_classes, labels):
raise RuntimeError("Number of class labels does not equal number of classes - number of labels is %d, number of classes is %d" % (len(labels), num_classes)) # TODO: check happens very late, too late in case there are fewer labels than classes (then another, more obscure error will already have been raised). Resolving this issue isn't easy without code duplication.
elif not self._check_labels_uniqueness(labels):
raise RuntimeError("Class labels are not unique - labels are %s" % repr(labels))
def __add__(self, other):
return ConfusionMatrix(np.asarray(self) + np.asarray(other))
def __repr__(self):
return "ConfusionMatrix(%s)" % repr(self._arr)
def __array__(self):
return self._arr
asarray = __array__
array = __array__
@classmethod
def from_data(cls, num_classes, output_classes, target_classes, labels=None):
"""
Create a confusion matrix from a set of target class labels and corresponding predictions.
num_classes: the number of classes (also the size of the resulting matrix).
output_classes: predictions (MDP-style numpy array: 1 label per row)
target_classes: target class labels (MDP-style numpy array: 1 label per row)
labels: the class labels that were used. Defaults to 0, 1, 2, ..., num_classes-1.
"""
if output_classes.shape != target_classes.shape:
raise RuntimeError("Output and target data should have the same shape")
if labels is None:
labels = range(0, num_classes)
conf = np.zeros((num_classes, num_classes))
for output_label, target_label in zip(output_classes, target_classes):
o = labels.index(output_label)
t = labels.index(target_label)
conf[t, o] += 1 # targets = rows, predictions = columns
return ConfusionMatrix(conf, labels)
@classmethod
def from_samples(cls, num_classes, output_classes_list, target_classes_list, normalisation=True, labels=None):
"""
Create a confusion matrix from a set of samples, where each sample is equally weighted
irrespective of its length. ConfusionMatrix.from_data is applied on each sample. The
resulting matrices are then normalised, and subsequently averaged (which results in a
normalised matrix).
num_classes: see ConfusionMatrix.from_data
output_classes_list: iterable containing prediction data (see ConfusionMatrix.from_data for the correct format).
target_classes_list: iterable containing target class labels (see ConfusionMatrix.from_data for the correct format)
normalisation: defaults to True. If this parameter is set to False, no normalisation occurs and the confusion matrices corresponding to the samples are just added together.
labels: the class labels that were used. Defaults to 0, 1, 2, ..., num_classes-1.
"""
matrices = [] # one matrix per sample
for x, y in zip(output_classes_list, target_classes_list):
nc = ConfusionMatrix.from_data(num_classes, x, y, labels)
if normalisation:
nc = nc.normalise()
matrices.append(nc.asarray())
if normalisation:
matrix = np.array(matrices).mean(0)
else:
matrix = np.array(matrices).sum(0)
return ConfusionMatrix(matrix)
@classmethod
def error_measure(cls, name, num_classes, labels=None, from_samples=False, normalisation=True):
"""
Returns a function that constructs a confusion matrix and then computes the desired metric.
For example:
>>> ber = ConfusionMatrix.error_measure('ber', 2)
>>> print ber(input_signal, target_signal)
If from_samples is set to True, the returned function will use from_samples instead of
from_data to construct the confusion matrix.
"""
def f(input_signal, target_signal):
if from_samples:
cm = cls.from_samples(num_classes, input_signal, target_signal, labels=labels,
normalisation=normalisation)
else:
cm = cls.from_data(num_classes, input_signal, target_signal, labels=labels)
return getattr(cm, name)
return f
def normalise(self):
"""
Returns a confusion matrix in which the sum of all elements is 1.
This is useful for percentage information, and for adding matrices together with equal weights.
"""
return ConfusionMatrix(self._arr / self.total)
def normalise_per_class(self):
"""
Returns a balanced confusion matrix, in which all rows sum to 1.
This is particularly useful for visualisation.
"""
# in a CM normalised per class, classification accuracy and BCR are equivalent.
truth = np.atleast_2d(self.truth).T
normalised_arr = self._arr / truth # will have NaNs where truth == 0
normalised_arr[np.isnan(normalised_arr)] = 0
return ConfusionMatrix(normalised_arr)
balance = normalise_per_class # in a CM normalised per class, accuracy and BCR,
# error rate and BER, ... are equivalent; hence the name of this alias.
# in a balanced confusion matrix, each class has the same weight.
@property
def num_classes(self):
"""
The number of different classes in this matrix, which also corresponds to its size.
"""
return len(self._arr)
def __len__(self):
return self.num_classes
@property
def total(self):
"""
The total amount of observations, corresponding to the sum of all elements of the matrix.
For normalised matrices, this is always 1.
"""
return self._arr.sum()
@property
def correct(self):
"""
The amount of correct detections for each class, corresponding to the diagonal elements
of the matrix.
"""
return self._arr.diagonal()
@property
def incorrect(self):
"""
The amount of incorrect detections for each class.
"""
return self.truth - self.correct
@property
def detections(self): # detections are column sums
"""
The number of detections for each class, corresponding to the sums of the columns.
"""
return self._arr.sum(0)
@property
def ground_truth(self): # number of values of each class (row sums)
"""
The number of entries for each class (ground truth), corresponding to the sums
of the rows.
"""
return self._arr.sum(1)
truth = ground_truth
def binary(self):
"""
returns a list of binary confusion matrices for each class,
where that class is considered 'positive' and the others are 'negative'.
The result is returned as a list of 2x2 BinaryConfusionMatrices.
"""
binary_confs = []
for i in range(self.num_classes):
# for each class, compute true positives, true negatives, false positives, false negatives
not_i = range(0, i) + range(i+1, self.num_classes)
tp = self._arr[i, i]
fn = self._arr[i, not_i].sum()
fp = self._arr[not_i, i].sum()
# this is very dirty, but unfortunately conf[not_i, not_i] does not work as expected.
tn_rows = not_i*len(not_i)
tn_cols = list(tn_rows) # make a copy
tn_cols.sort()
tn = self._arr[tn_rows, tn_cols].sum()
b = BinaryConfusionMatrix([[tp, fn], [fp, tn]])
binary_confs.append(b)
return binary_confs
@property
def classification_rates(self):
"""
The relative amount of correct classifications for each class.
"""
r = self.correct / self.truth
r[np.isnan(r)] = 0
return r
@property
def error_rates(self): # compute the error rates per class
"""
The relative amount of misclassifications for each class.
"""
r = self.incorrect / self.truth
r[np.isnan(r)] = 0
return r
@property
def balanced_error_rate(self):
"""
The average relative amount of misclassifications, where each class
is weighed equally. In a balanced matrix, this is the same as the
classification error rate.
"""
r = self.error_rates
n = len(r)
j = n
for i in xrange(n):
if r[i]==0.0:
n -= 1
return r.sum()/n # avg of error rates = BER = 1 - BCR
ber = balanced_error_rate
@property
def balanced_classification_rate(self):
"""
The average relative amount of correct classifications, where each
class is weighed equally. In a balanced matrix, this is the same as
the classification accuracy.
"""
return self.classification_rates.mean() # avg of classification rates = BCR = 1 - BER
bcr = balanced_classification_rate
@property
def classification_accuracy(self):
"""
The relative amount of correct classifications.
"""
return self.correct.sum() / self.total
accuracy = classification_accuracy
acc = accuracy
@property
def classification_error_rate(self):
"""
The relative amount of misclassifications.
"""
return self.incorrect.sum() / self.total
error_rate = classification_error_rate
err = error_rate
class BinaryConfusionMatrix(ConfusionMatrix):
"""
This class represents a confusion matrix for a binary classification problem,
in which one class is considered 'positive' and the other 'negative'. It extends
the ConfusionMatrix class, so it provides all the methods and properties of this
class. In addition, some error and accuracy measures specific to binary problems
are provided as properties.
"""
# additional error measures and functionality for binary confusion matrices,
# where the first class is considered 'positive' and the second 'negative'.
def _check_shape(self, arr):
# check if this is a 2x2 array
return (arr.shape[0] == 2) and (arr.shape[1] == 2)
def _assert_dimensions(self, arr):
if not self._check_ndim(arr):
raise RuntimeError("ConfusionMatrix should be 2-dimensional - ndim is %d" % arr.ndim)
elif not self._check_shape(arr):
raise RuntimeError("ConfusionMatrix should have shape 2x2 - shape is %dx%d" % arr.shape)
def __add__(self, other): # overridden to make sure that the resulting matrix is also binary
return BinaryConfusionMatrix(np.asarray(self) + np.asarray(other))
def __repr__(self):
return "BinaryConfusionMatrix(%s)" % repr(self._arr)
@classmethod
def from_data(cls, output_classes, target_classes, labels=None):
# By default '0' is the POSITIVE label, '1' is the NEGATIVE label!
cf = super(BinaryConfusionMatrix, cls).from_data(2, output_classes, target_classes, labels)
return BinaryConfusionMatrix(cf)
@classmethod
def from_samples(cls, output_classes_list, target_classes_list, labels=None):
# By default '0' is the POSITIVE label, '1' is the NEGATIVE label!
cf = super(BinaryConfusionMatrix, cls).from_samples(2, output_classes_list, target_classes_list, labels)
return BinaryConfusionMatrix(cf)
@classmethod
def error_measure(cls, name, labels=None, from_samples=False, normalisation=True):
"""
Returns a function that constructs a confusion matrix and then computes the desired metric.
For example:
>>> recall = ConfusionMatrix.error_measure('recall')
>>> print recall(input_signal, target_signal)
If from_samples is set to True, the returned function will use from_samples instead of
from_data to construct the confusion matrix.
"""
def f(input_signal, target_signal):
if from_samples:
cm = cls.from_samples(input_signal, target_signal, labels=labels,
normalisation=normalisation)
else:
cm = cls.from_data(input_signal, target_signal, labels=labels)
return getattr(cm, name)
return f
def normalise(self): # overridden to make sure that the resulting matrix is also binary
"""
Returns a confusion matrix in which the sum of all elements is 1.
This is useful for percentage information, and for adding matrices together with equal weights.
"""
return BinaryConfusionMatrix(self._arr / self.total)
def normalise_per_class(self): # overridden to make sure that the resulting matrix is also binary
"""
Returns a balanced confusion matrix, in which all rows sum to 1.
This is particularly useful for visualisation.
"""
# in a CM normalised per class, classification accuracy and BCR are equivalent.
truth = np.atleast_2d(self.truth).T
return BinaryConfusionMatrix(self._arr / truth)
balance = normalise_per_class
@property
def true_positives(self):
"""
The amount of true positives (target label 0, predicted label 0).
"""
return self._arr[0, 0]
@property
def false_positives(self):
"""
The amount of false positives (target label 1, predicted label 0).
"""
return self._arr[1, 0]
@property
def true_negatives(self):
"""
The amount of true negatives (target label 1, predicted label 1).
"""
return self._arr[1, 1]
@property
def false_negatives(self):
"""
The amount of false negatives (target label 0, predicted label 0).
"""
return self._arr[0, 1]
tp = true_positives
fp = false_positives
tn = true_negatives
fn = false_negatives
@property
def positives(self):
"""
The amount of positive data (label 0).
"""
return self.tp + self.fn
@property
def negatives(self):
"""
The amount of negative data (label 1).
"""
return self.tn + self.fp
pos = positives
neg = negatives
@property
def positive_detections(self):
"""
The amount of positive detections (label 0).
"""
return self.tp + self.fp
@property
def negative_detections(self):
"""
The amount of negative detections (label 1).
"""
return self.tn + self.fn
pd = positive_detections
nd = negative_detections
@property
def recall(self):
"""
Recall or sensitivity or true positive rate, defined as the amount of
correct positive detections, divided by the total amount of positive data.
"""
return self.tp / self.pos
@property
def precision(self):
"""
Precision, defined as the amount of correct positive detections,
divided by the total number of positive detections.
"""
return self.tp / self.pd
r = recall
p = precision
def f_measure(self, beta=1): # not a property, because the beta parameter can be tuned
"""
F measure, harmonic mean of recall and precision. The mean can optionally be
weighted with a parameter beta. beta > 1 increases the importance of recall,
beta < 1 increases the importance of precision. The default value is 1, resulting
in the so-called F1-measure.
"""
return (1 + beta**2) * ( (self.p*self.r) / ((beta**2) * self.p + self.r) )
@property
def f1(self): # for convenience
"""
F1 measure (see BinaryConfusionMatrix.f_measure() for more information).
"""
return self.f_measure(1)
sensitivity = recall
sens = sensitivity
@property
def specificity(self):
"""
Specificity or true negative rate, defined as the amount of correct
negative detections, divided by the total amount of negative data.
"""
return self.tn / self.neg
spec = specificity
true_positive_rate = sensitivity
true_negative_rate = specificity
tp_rate = true_positive_rate
tn_rate = true_negative_rate
@property
def positive_likelihood(self):
"""
Positive likelihood, defined as sensitivity / (1 - specificity)
"""
return self.sens / (1 - self.spec)
@property
def negative_likelihood(self):
"""
Negative likelihood, defined as specificity / (1 - sensitivity)
"""
return self.spec / (1 - self.sens)
@property
def youden_index(self):
"""
Youden's index or Youden's J statistic, defined as sensitivity + specificity - 1
"""
return self.sens + self.spec - 1
@property
def matthews_correlation_coefficient(self):
"""
Matthews Correlation Coefficient (MCC), a correlation coefficient between
the observed and predicted binary classifications.
"""
num = self.tp * self.tn + self.fp * self.fn
denom = np.sqrt( self.pd * self.pos * self.neg * self.nd )
return num / denom
mcc = matthews_correlation_coefficient
# plotting
try:
import matplotlib.pyplot as plt
def plot_conf(conf):
"""
Simple function to visualise a balanced confusion matrix.
"""
res = plt.imshow(np.asarray(conf), cmap=plt.cm.jet, interpolation='nearest')
# display correct detection percentages (only makes sense for CMs that are normalised per class (each row sums to 1)).
for i, err in enumerate(conf.correct):
err_percent = "%d%%" % round(err * 100)
plt.text(i-.2, i+.1, err_percent, fontsize=14)
cb = plt.colorbar(res)
# plt.show()
plt.savefig('C_Matrix.png')
except ImportError:
def plot_conf(conf):
raise RuntimeError("Could not import matplotlib.pyplot, plotting not available.")