-
Notifications
You must be signed in to change notification settings - Fork 0
/
posneg.py
69 lines (55 loc) · 2.03 KB
/
posneg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from gensim import corpora, models, similarities
import logging
import sys
import readYelp
import numpy
from scipy.io import mmwrite
from scipy.sparse import lil_matrix
from nltk.corpus import stopwords
from nltk.stem import porter, lancaster
import string
import pickle
import re
def posneg(filename, n=0):
if n == 0:
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_review.json')
else:
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_review.json', n)
bus2index = {}
businesses = 0
user2index = {}
users = 0
for review in d:
if review["business_id"] not in bus2index:
bus2index[review["business_id"]] = businesses
businesses += 1
if review["user_id"] not in user2index:
user2index[review["user_id"]] = users
users += 1
#outarray = numpy.zeros((businesses,users), dtype=numpy.int)
matrix = lil_matrix((users, businesses))
for review in d:
answer = ""
while answer != "y" and answer != "n":
print review["stars"], review["text"]
answer = raw_input()
if answer == "y":
matrix[user2index[review["user_id"]], bus2index[review["business_id"]]] = 1
elif answer == "n":
matrix[user2index[review["user_id"]], bus2index[review["business_id"]]] = -1
mmwrite(filename, matrix)
def posneg2(filename, startPos, endPos):
(l, d) = readYelp.readY('../yelp/yelp_academic_dataset_review.json', endPos)
outputreviews = []
for review in d[startPos:endPos]:
answer = ""
while answer != "y" and answer != "n":
print review["stars"], review["text"]
answer = raw_input()
if answer == "y":
outputreviews.append(([review["user_id"], review["business_id"], 1]))
elif answer == "n":
outputreviews.append(([review["user_id"], review["business_id"], -1]))
ofile = open(filename, 'wb')
pickle.dump(outputreviews, ofile)
ofile.close()