-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
118 lines (91 loc) · 3.5 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import sys
import re
import numpy as np
label_dict = {'audit': 0, 'authenticate': 1, 'heartbeat': 2,
'pooling': 3, 'scheduler': 4, 'unrelated': 5}
confusion_matrix = np.zeros(shape=(len(label_dict), len(label_dict)))
def calculate_precisions(confusion_matrix):
column_val = np.sum(confusion_matrix, axis=0)
for i in range(len(confusion_matrix)):
if not column_val[i] == 0:
column_val[i] = confusion_matrix[i, i] / column_val[i]
return column_val
def calculate_recalls(confusion_matrix):
row_val = np.sum(confusion_matrix, axis=1)
for i in range(len(confusion_matrix)):
if not row_val[i] == 0:
row_val[i] = confusion_matrix[i, i] / row_val[i]
return row_val
def calculate_accuracy(confusion_matrix):
matrix_sum = confusion_matrix.sum()
true_sum = confusion_matrix.diagonal().sum()
accuracy = true_sum / matrix_sum
return accuracy
def process(line, answers):
if re.match('(^\d{4}-|^#).*', line):
return
file, prediction = line.split(' -> ')
file = file.strip()
prediction = prediction.strip()
idx_prediction = label_dict[prediction]
gold_answers = get_gold_answers(file, answers)
if prediction in gold_answers:
confusion_matrix[idx_prediction][idx_prediction] += 1
else:
for gold_answer in gold_answers:
idx_gold = label_dict[gold_answer]
confusion_matrix[idx_gold][idx_prediction] += 1
def get_gold_answers(filename, answers):
gold_answers = list()
for tactic in answers.keys():
if check_list_for_name(filename, answers[tactic]):
gold_answers.append(tactic)
if not gold_answers:
gold_answers = ['unrelated']
return gold_answers
def check_list_for_name(filename, tactic_list):
for name in tactic_list:
if name == filename:
return True
return False
def process_file(infile, answers):
with open(infile, 'r') as inp:
for line in inp.readlines():
res = process(line, answers)
printResults(confusion_matrix)
def printResults(confusion_matrix):
precisions = calculate_precisions(confusion_matrix)
recalls = calculate_recalls(confusion_matrix)
avg_precision = np.average(precisions[:-1])
avg_recall = np.average(recalls[:-1])
accuracy = calculate_accuracy(confusion_matrix)
log_txt = 'Precision: Average (w/o U) {0:.2%} -> {1}\nRecall: Average (w/o U) {2:.2%} -> {3}\nAccuracy: {4:.2%}\n'.format(
avg_precision, precisions, avg_recall, recalls, accuracy)
log_txt += '{}'.format(confusion_matrix)
print("{}\n".format(log_txt))
def readAnswerSet(answerSetFile):
tactics_dict = createTacticsDict(answerSetFile)
with open(answerSetFile, 'r') as file:
curr_tactic = ''
for line in file.readlines():
if line.startswith('# '):
curr_tactic = line.strip(' #\n')
continue
line = line.strip()
if not line:
continue
tactics_dict[curr_tactic].append(line)
return tactics_dict
def createTacticsDict(answerSetFile):
tactics = []
with open(answerSetFile, 'r') as file:
for line in file.readlines():
if line.startswith('# '):
tactic = line.strip(' #\n')
tactics.append(tactic)
return dict((k, []) for k in tactics)
if __name__ == "__main__":
answerSetFile = sys.argv[1]
infile = sys.argv[2]
answers = readAnswerSet(answerSetFile)
process_file(infile, answers)