-
Notifications
You must be signed in to change notification settings - Fork 1
/
class_accuracy_box_plot.py
153 lines (128 loc) · 4.45 KB
/
class_accuracy_box_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
misclassification_list = list()
model_num = 10
iteration_num = 100
for i in range(model_num):
filename = 'mis_classify_' + str(i+1) + '_.csv'
data_pd = pd.read_csv(filename, sep=',', header=None)
data = data_pd.values
data = data[:, 100:200]
misclassification_list.append(data)
misclassify_dict = dict()
for i in range(len(misclassification_list)):
unique, counts = np.unique(misclassification_list[i], return_counts=True)
dic = dict(zip(unique, counts))
misclassify_dict = {x: misclassify_dict.get(x, 0) + dic.get(x, 0) for x in set(misclassify_dict).union(dic)}
total_mis_classify = np.zeros((120*model_num, iteration_num))
for i in range(model_num):
total_mis_classify[i*120:(i+1)*120, :] = misclassification_list[i]
num_mis_classify_images = np.count_nonzero(total_mis_classify)
for i in range(120):
if i+1 in misclassify_dict:
pass
else:
misclassify_dict[i+1] = 0
mis_list = np.zeros(120)
# print('Misclassified images percentage is calculated by for specific image, the misclassified'
# ' times divided total classification number')
# for i in range(120):
# mis_percent = misclassify_dict[i+1]/(model_num*iteration_num)*100
# mis_list[i] = mis_percent
# print('Misclassified images percentage for image', str(i+1), 'is: %.4f' % mis_percent, '%')
cluster_chemistry_result_dataframe = pd.read_csv('test_cluster_chemistry_result.csv', header=None)
cluster_chemistry_result = cluster_chemistry_result_dataframe.values
cluster_chemistry_result = cluster_chemistry_result.reshape(1, int(120 / 4))
cluster_chemistry_result = np.repeat(cluster_chemistry_result, 2)
cluster_chemistry_result = np.concatenate((cluster_chemistry_result, cluster_chemistry_result))
Y = cluster_chemistry_result
# calculate accuracy of each class
y_1 = 0
y_2 = 0
y_3 = 0
y_4 = 0
y_5 = 0
y_6 = 0
for i in range(len(Y)):
if Y[i] == 1:
y_1 = y_1 + 1
for i in range(len(Y)):
if Y[i] == 2:
y_2 = y_2 + 1
for i in range(len(Y)):
if Y[i] == 3:
y_3 = y_3 + 1
for i in range(len(Y)):
if Y[i] == 4:
y_4 = y_4 + 1
for i in range(len(Y)):
if Y[i] == 5:
y_5 = y_5 + 1
for i in range(len(Y)):
if Y[i] == 6:
y_6 = y_6 + 1
y_1 = y_1 * 100
y_2 = y_2 * 100
y_3 = y_3 * 100
y_4 = y_4 * 100
y_5 = y_5 * 100
y_6 = y_6 * 100
acc_1_list = np.zeros(10)
acc_2_list = np.zeros(10)
acc_3_list = np.zeros(10)
acc_4_list = np.zeros(10)
acc_5_list = np.zeros(10)
acc_6_list = np.zeros(10)
for i in range(model_num):
mis_y_1 = 0
mis_y_2 = 0
mis_y_3 = 0
mis_y_4 = 0
mis_y_5 = 0
mis_y_6 = 0
for row in range(120):
for col in range(100):
num = misclassification_list[i][row, col]
if num != 0:
class_index = Y[int(num-1)]
if class_index == 1:
mis_y_1 = mis_y_1 + 1
if class_index == 2:
mis_y_2 = mis_y_2 + 1
if class_index == 3:
mis_y_3 = mis_y_3 + 1
if class_index == 4:
mis_y_4 = mis_y_4 + 1
if class_index == 5:
mis_y_5 = mis_y_5 + 1
if class_index == 6:
mis_y_6 = mis_y_6 + 1
mis_acc_1 = (1 - mis_y_1 / y_1) * 100
mis_acc_2 = (1 - mis_y_2 / y_2) * 100
mis_acc_3 = (1 - mis_y_3 / y_3) * 100
mis_acc_4 = (1 - mis_y_4 / y_4) * 100
mis_acc_5 = (1 - mis_y_5 / y_5) * 100
mis_acc_6 = (1 - mis_y_6 / y_6) * 100
acc_1_list[i] = mis_acc_1
acc_2_list[i] = mis_acc_2
acc_3_list[i] = mis_acc_3
acc_4_list[i] = mis_acc_4
acc_5_list[i] = mis_acc_5
acc_6_list[i] = mis_acc_6
total_acc = np.zeros((10, 6))
total_acc[:, 0] = acc_1_list
total_acc[:, 1] = acc_2_list
total_acc[:, 2] = acc_3_list
total_acc[:, 3] = acc_4_list
total_acc[:, 4] = acc_5_list
total_acc[:, 5] = acc_6_list
fig_box_plot, ax = plt.subplots()
ax.set_title('Accuracy of each class')
ax.boxplot(total_acc)
plt.xlabel('Water sample class', fontsize=15)
plt.ylabel('Test dataset accuracy (percentage)', fontsize=15)
plt.yticks(fontsize=9)
plt.xticks([1, 2, 3, 4, 5, 6], ['class 1', 'class 2', 'class 3', 'class 4', 'class 5', 'class 6'], fontsize=9)
plt.show()
fig_box_plot.savefig('Test accuracy of each class.jpg')