forked from phosseini/Sherlock-sentiment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBaselineClassifier.py
46 lines (33 loc) · 1.28 KB
/
BaselineClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from FeatureVectorBuilder import FeatureVector
from ModelBuilder import ModelBuilder
from sklearn import dummy
import numpy as np
fv = FeatureVector()
# reading our labeled training data from text file
trainX, trainY = fv.readData('E:\\Documents\\CSCI_6907\\train.txt')
testX, testY = fv.readData('E:\\Documents\\CSCI_6907\\sample_test_set.txt')
# creating a backup of all data (train and test) before split
X_ = trainX + testX
Y_ = trainY + testY
# building all the feature vectors
X_, Y_ = fv.build(X_, Y_)
# preparing train and test data
X_train = X_[:len(trainX)]
Y_train = Y_[:len(trainY)]
X_test = X_[-len(testX):]
Y_test = Y_[-len(testY):]
clf = dummy.DummyClassifier(strategy='uniform',
random_state=None, constant=None).fit(X_train, Y_train)
# now we predict test set labels using our trained model
predictedY = clf.predict(X_test)
# converting the numpy array to a python list
predictedY = predictedY.tolist()
# evaluating the model using accuracy
# accuracy = # of correctly predicted labels / # number of tweets in test set
correct = 0
for index, item in enumerate(Y_test):
if predictedY[index] == Y_test[index]:
correct += 1
# calculating accuracy value
accuracy = correct / len(Y_test)
print(accuracy)