forked from sengupta/twss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
twss.py
executable file
·66 lines (51 loc) · 1.85 KB
/
twss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import nltk
import SocketServer
import sys
import datetime
try:
PORT = int(sys.argv[1])
except (IndexError, NameError):
PORT = 8083
def extract_features(phrase):
"""
This function will extract features from the phrase being used.
Currently, the feature we are extracting are unigrams of the text corpus.
"""
words = nltk.word_tokenize(phrase)
features = {}
for word in words:
features['contains(%s)' % word] = (word in words)
return features
twss_data = open('twss.txt')
non_twss_data = open('non_twss.txt')
training_data = []
for line in twss_data:
training_data.append((line, True))
for line in non_twss_data:
training_data.append((line, False))
training_feature_set = [(extract_features(line), label) for (line, label) in training_data]
classifier = nltk.NaiveBayesClassifier.train(training_feature_set)
print classifier.classify(extract_features("That was not so hard"))
log = open('log.txt', 'a')
class ServeTWSS(SocketServer.BaseRequestHandler):
def handle(self):
self.data = self.request.recv(140)
print "Got connection from: ", self.client_address[0]
client_test_statement = self.data
print " Got data: ", client_test_statement
log.write(self.client_address[0] + ", " + str(datetime.datetime.now()) + ", " + "\"" + client_test_statement + "\"" + ", ")
if classifier.classify(extract_features(client_test_statement)):
self.request.sendall("True")
print "Classified True\n"
log.write("True\n")
else:
self.request.sendall("False")
print "Classified False\n"
log.write("False\n")
log.flush()
def serve(PORT):
server = SocketServer.TCPServer(("", PORT), ServeTWSS)
server.serve_forever()
if __name__ == "__main__":
print "Serving..."
serve(PORT)