-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmcnemar.py
103 lines (84 loc) · 3.21 KB
/
mcnemar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import json
import math
import sys
# using ben's results file
def mcnemar(rep,svmdata,baselinedata, null_comparison,baseline_results,svm_results):
if rep not in svmdata:
print "rep not in svmdata"
if rep not in baselinedata:
print "rep not in baseline data"
svm= svmdata[rep]
baseline= baselinedata[rep]
svm_acc= (svm_results[rep])["Test Accuracy"]
baseline_acc=(baseline_results[rep])["Accuracy"]
n_svm=0
n_base=0
results={}
for bill in svm: #misclassified by svm
if (bill not in baseline): #not misclassified by baseline
n_svm=n_svm+1
for bill in baseline:
if (bill not in svm):
n_base=n_base+1
numer = math.pow(float((math.fabs(n_svm- n_base) -1)),2.0)
denom= n_svm + n_base
if denom==0 :
print "n_svm_misclassified and n_baseline_misclassified are 0 for rep#: "+str(rep)
value= sys.maxint
if float(svm_acc)>float(baseline_acc):
better_algorithm= 1
else:
better_algorithm= 0
else:
value = float(numer/float(denom))
if value > null_comparison:
if float(svm_acc)>float(baseline_acc):
better_algorithm= 1
else:
better_algorithm= 0
else:
better_algorithm=2
results["n_svm_misclassified"]= n_svm
results["n_baseline_misclassified"]= n_base
results["value"]= value
results["SVM accuracy"]=svm_acc
results["baseline accuracy"]=baseline_acc
results["null_hypothesis"]= null_comparison
results["better_algorithm"]= better_algorithm
return results
# 1 for svm better
# 0 for baseline better
# 2 null_hypothesis that both algorithms have different performance not rejected by mcnemar's test
def mcnemarAll(null_comparison = 3.841459):
name = "all_no_summary_validation"
name2 = "all__baseline"
svmfile= open("mcnemar_data/"+name,"r")
baselinefile= open("mcnemar_data/"+name2,"r")
svm_results = json.loads((open('experiment_results/'+name+'_ben.json', 'r')).read())
baseline_results = json.loads((open('experiment_results/'+name2+'.json', 'r')).read())
# json generated by svm and baseline experiments
svmdata= json.loads(svmfile.read())
baselinedata=json.loads(baselinefile.read())
personlist = json.loads(open('representatives').read())
stats={}
for rep in personlist:
stats[rep]=mcnemar(rep= rep,svmdata=svmdata,baselinedata=baselinedata, null_comparison= null_comparison, svm_results=svm_results, baseline_results= baseline_results)
#print "Performed test for rep : "+ str(rep)
writeAll(stats)
def writeAll(stats):
print "Done with all reps"
raw_input("Press Enter to continue... \nAbout to write .csv. Make sure to close the mcnemar results file if you have it open.")
# Format stats for excel:
f = open('experiment_results/all_mcnemar'+'.csv', 'w')
#Write headers:
for stat_name in stats[stats.keys()[0]]:
f.write(','+stat_name)
f.write('\n')
#Write stats
for rep_id in stats:
f.write(str(rep_id))
for stat in stats[rep_id]:
f.write(','+str(stats[rep_id][stat]))
f.write('\n')
f.close()
mcnemarAll()