-
Notifications
You must be signed in to change notification settings - Fork 5
/
SentimentClassifier.py
64 lines (49 loc) · 2.23 KB
/
SentimentClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB
class SentimentClassifier:
def __init__(self):
self.classifier = BernoulliNB()
self.trainDocuments = []
self.trainLabels = []
self.count_vectorizer = CountVectorizer(binary="true")
def aggregateTrainingData(self, datafiles=None):
"""
Aggregates labeled training data passed in from datafiles
"""
# with open("classifierData/imdb_labelled.txt", "r") as text_file:
# lines = text_file.read().split("\n")
# with open("classifierData/amazon_cells_labelled.txt", "r") as text_file:
# lines = text_file.read().split("\n")
# with open("classifierData/yelp_labelled.txt", "r") as text_file:
# lines = text_file.read().split("\n")
with open("classifierData/masterdata_labelled.txt", "r") as text_file:
lines = text_file.read().split("\n")
newLines = [line.split("\t") for line in lines if len(
line.split("t")) == 2 and line.split("\t")[1] != ""]
# Split data into Train Features & Train Labels:
self.trainDocuments = [line[0] for line in newLines]
self.trainLabels = [int(line[1]) for line in newLines]
self.vectorizeTrainingData()
def vectorizeTrainingData(self):
# Convert the training set to a matrix of token counts:
self.trainDocuments = self.count_vectorizer.fit_transform(self.trainDocuments)
def trainClassifier(self):
"""
Fit the classifier to the training data
"""
# if not self.trainDocuments:
# print("No training data!")
self.classifier = self.classifier.fit(
self.trainDocuments, self.trainLabels)
def predictionOutput(self, sentence):
"""
This function outputs the sentiment analysis label (Positive or Negative) for a given sentence
"""
prediction = self.classifier.predict(
self.count_vectorizer.transform([sentence]))
if prediction[0] == 1:
print("This is a Positive Sentiment")
elif prediction[0] == 0:
print("This is a Negative Sentiment")