-
Notifications
You must be signed in to change notification settings - Fork 12
/
RuleBasedClassifier.py
125 lines (104 loc) · 4.33 KB
/
RuleBasedClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
Created by Harry Pardo
Copyright (c) 2018 Reynaldo John Tristan Mahinay Jr., Franz Stewart Dizon, Stephen Kyle Farinas and Harry Pardo
"""
import pyodbc
import time
import pandas as pd
def get_rules():
cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};"
"Server=(localdb)\MSSQLLocalDB;"
"Database=ThesisSampleDB;"
"Trusted_Connection=yes;")
cursor = cnxn.cursor()
start_time = time.time()
query = "SELECT * FROM [dbo].[Apriori_Rules2] WHERE (CONSEQUENT = 'DENGUE NEXT_HIGH' or CONSEQUENT = 'DENGUE NEXT_LOW') ORDER BY CONSEQUENT,CONFIDENCE DESC, LIFT DESC,NUM_ANTECEDENT ASC"
cursor.execute(query)
columns = [column[0] for column in cursor.description]
temp = cursor.fetchall()
for i in range(0, len(temp)):
temp[i] = tuple(temp[i])
apriori_rules = pd.DataFrame(temp, columns=columns)
print("Apriori get rules: --- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
query = "SELECT * FROM [dbo].[FP_Rules] WHERE (CONSEQUENT = 'DENGUE NEXT_HIGH' or CONSEQUENT = 'DENGUE NEXT_LOW') ORDER BY CONSEQUENT,CONFIDENCE DESC,NUM_ANTECEDENT ASC"
cursor.execute(query)
columns = [column[0] for column in cursor.description]
temp = cursor.fetchall()
for i in range(0, len(temp)):
temp[i] = tuple(temp[i])
fp_rules = pd.DataFrame(temp, columns=columns)
print("FP get rules: --- %s seconds ---" % (time.time() - start_time))
return apriori_rules, fp_rules
# Rule-base Classifier
def find_match(test_data, rules):
answers = []
listdata = test_data[test_data.columns[:-1]].values.tolist()
temp = 0
ans = False
for data in listdata:
for ant, con, n in zip(rules['Antecedent'], rules['Consequent'], rules['Num_Antecedent']):
if (n > 1):
antecedents = ant.split(',')
else:
antecedents = ant
ans = set(antecedents) < set(list(data))
if (ans == True):
answers.append(con)
if (con == 'DENGUE NEXT_HIGH'):
temp += 1
break
if (ans == False):
answers.append('DENGUE NEXT_LOW')
print(temp)
return answers
def check_accuracy(test_data_dengue, rule_based_answers):
total = len(test_data_dengue) # might need checking
tp = 0
fp = 0
tn = 0
fn = 0
testRows = 0
for x, y in zip(test_data_dengue, rule_based_answers):
# print(str(x) + " " + str(y))
# print("TestRows = " + str(testRows))
if (str(x) == 'DENGUE NEXT_HIGH' and str(y) == 'DENGUE NEXT_HIGH'):
tp += 1
# print("True Positive: " + str(tp))
elif (str(x) == 'DENGUE NEXT_LOW' and str(y) == 'DENGUE NEXT_HIGH'):
fp += 1
# print("False Positive: " + str(fp))
elif (str(x) == 'DENGUE NEXT_LOW' and str(y) == 'DENGUE NEXT_LOW'):
tn += 1
# print("True Negative: " + str(tn))
elif (str(x) == 'DENGUE NEXT_HIGH' and str(y) == 'DENGUE NEXT_LOW'):
fn += 1
testRows += 1
if (tp == 0):
PPV = 0
else:
PPV = float(tp / (tp + fp))
print("PPV:" + str(PPV * 100))
print("NPV:" + str(float(tn / (tn + fn) * 100)))
print("Sensitivity:" + str(float(tp / (tp + fn) * 100)))
print("Specificity:" + str(float(tn / (tn + fp) * 100)))
if (PPV == 0):
print("F: 0")
else:
print("F1:" + str(float((2 * tp) / (2 * tp + fp + fn) * 100)))
print(str(float((tp + tn) / (total) * 100)))
print("TP:" + str(tp) + " FP: " + str(fp) + " TN: " + str(tn) + " FN: " + str(fn))
def classfiy(test_data):
apriori_rules, fp_rules = get_rules()
start_time = time.time()
prediction_apriori = find_match(test_data, apriori_rules)
print("Apriori classify: --- %s seconds ---" % (time.time() - start_time))
start_time = time.time()
prediction_fp = find_match(test_data, fp_rules)
print("FP classify: --- %s seconds ---" % (time.time() - start_time))
print('---------------------------------------------------')
print('Apriori')
check_accuracy(test_data['dengue_next'], prediction_apriori)
print('---------------------------------------------------')
print('Fp Growth')
check_accuracy(test_data['dengue_next'], prediction_fp)