-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathclassifierTrain.py
61 lines (41 loc) · 1.76 KB
/
classifierTrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -*- coding: utf-8 -*-
"""Sentiment Analysis Demo VIP.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1bD3Zr3yNVoVYtd7vIhCzkQKTM39G-ZYf
"""
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB
#Upload files to this notebook
# from google.colab import files
# uploaded = files.upload()
#Combine the files
with open("classifierData/imdb_labelled.txt", "r") as text_file:
lines = text_file.read().split("\n")
with open("classifierData/amazon_cells_labelled.txt", "r") as text_file:
lines = text_file.read().split("\n")
with open("classifierData/yelp_labelled.txt", "r") as text_file:
lines = text_file.read().split("\n")
newLines = [line.split("\t") for line in lines if len(line.split("t")) == 2 and line.split("\t")[1] != ""]
#Split data into Train Features & Train Labels:
train_documents = [line[0] for line in newLines]
train_labels = [int(line[1]) for line in newLines]
#Convert the training set to a matrix of token counts:
count_vectorizer = CountVectorizer(binary="true")
train_documents = count_vectorizer.fit_transform(train_documents)
#Fit the BernoulliNB Classifier:
classifier = BernoulliNB().fit(train_documents, train_labels)
def predictionOutput(sentence):
"""
This function outputs the sentiment analysis label (Positive or Negative) for a given sentence
"""
prediction = classifier.predict(count_vectorizer.transform([sentence]))
if(prediction[0] == 1):
print("This is a Positive Sentiment")
elif (prediction[0] == 0):
print("This is a Negative Sentiment")
#Testing
predictionOutput("I am having a very good and great day")
predictionOutput("I hate this terrible movie")