-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVM.py
128 lines (106 loc) · 3.18 KB
/
SVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/python3
# Author: Deepak Pandita
# Date created: 02 Feb 2018
import numpy as np
import argparse
#This function trains a Support Vector Machine on training examples with given epochs, capacity, learning rate, initial weights and bias
#The function returns the learned weights and bias
def train_svm(examples, epochs, c, eta, w, b):
#print('Running SVM...')
N = len(examples)
epoch = 0
while(epoch < epochs):
epoch += 1
#print('Epoch: ' + str(epoch))
for line in examples:
tokens = line.strip().split(' ')
y = float(tokens[0]) #label
instance = tokens[1:]
x = np.zeros(123)
for token in instance:
feature = int(token.split(":")[0])
value = float(token.split(":")[1])
#print feature
x[feature-1] = value
if (1 - y * (sum(w * x) + b)) >= 0:
w = w - eta * ((w / N) - c * y * x)
b = b + eta * y * c
else:
w = w - eta * (w / N)
return w,b
#This function predicts the label on given examples using given weights, bias and returns the accuracy
def getAccuracy(examples, w, b):
correct = 0
for line in examples:
tokens = line.strip().split(' ')
y = float(tokens[0]) #label
instance = tokens[1:]
x=np.zeros(123)
for token in instance:
feature = int(token.split(":")[0])
value = float(token.split(":")[1])
#print feature
x[feature-1] = value
if y*(sum(w * x) + b) > 0:
correct += 1
accuracy = float(correct)/len(examples)
return accuracy
def main():
#using optional parameters
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', action="store", help = "No. of epochs to run", type = int)
parser.add_argument('--capacity', help = "Capacity", action = "store", type = float)
args = parser.parse_args()
#file paths
train_file = '/data/adult/a7a.train'
dev_file = '/data/adult/a7a.dev'
test_file = '/data/adult/a7a.test'
#learning rate
eta = 0.1
#default no. of epochs and capacity
epochs = 1
c = 0.868
if args.epochs:
epochs = args.epochs
if args.capacity:
c = args.capacity
#weights (There are 123 features in the data)
w = np.zeros(123)
#bias term
b = 0
print("EPOCHS: "+str(epochs))
print("CAPACITY: "+str(c))
#Read train file
#print('Reading file: '+train_file)
f = open(train_file)
train_examples = f.readlines()
f.close()
#Call SVM
learned_weights, learned_bias = train_svm(train_examples, epochs, c, eta, w, b)
#Accuracy on training set
training_accuracy = getAccuracy(train_examples, learned_weights, learned_bias)
print("TRAINING_ACCURACY: " + str(training_accuracy))
#Read test file
#print('Reading file: '+test_file)
t = open(test_file)
test_examples = t.readlines()
t.close()
#Accuracy on test set
test_accuracy = getAccuracy(test_examples, learned_weights, learned_bias)
print("TEST_ACCURACY: " + str(test_accuracy))
#Read dev file
#print('Reading file: '+dev_file)
d = open(dev_file)
dev_examples = d.readlines()
d.close()
#Accuracy on dev set
dev_accuracy = getAccuracy(dev_examples, learned_weights, learned_bias)
print("DEV_ACCURACY: " + str(dev_accuracy))
#Print final bias and weights
final_svm = []
final_svm.append(learned_bias)
for wt in learned_weights:
final_svm.append(wt)
print("FINAL_SVM: "+str(final_svm))
if __name__ == '__main__':
main()