-
Notifications
You must be signed in to change notification settings - Fork 0
/
assignment.py
155 lines (146 loc) · 4.52 KB
/
assignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def extract_mentions(tweet):
''' (str) -> list of str
'''
lst = []
i = 0
while i < len(tweet) and tweet.find('@', i) != -1:
start = tweet.find('@', i)
end = tweet.find(' ', start)
if end != -1:
lst.append(tweet[start + 1: end])
else:
lst.append(tweet[start + 1:])
i = end
return lst
def extract_hashtags(tweet):
''' (str) -> list of str
'''
lst = []
i = 0
while i < len(tweet) and tweet.find('#', i) != -1:
start = tweet.find('#', i)
end = tweet.find(' ', start)
if end != -1:
if tweet[start + 1: end] not in lst:
lst.append(tweet[start + 1: end])
else:
if tweet[start + 1:] not in lst:
lst.append(tweet[start + 1:])
i = end
return lst
def count_words(tweet, word_dict):
''' (str, dict of {str: int}) -> NoneType
'''
lst = tweet.split()
aim = []
for i in range(len(lst)):
word = ''
if lst[i][0] != '@' and lst[i][0] != '#':
for s in lst[i]:
if s.isupper():
word += s.lower()
elif s.isdigit() or s.islower():
word += s
if word != '':
aim.append(word)
for w in aim:
if w not in word_dict:
word_dict[w] = 1
else:
word_dict[w] += 1
def common_words(word_dict, limit):
''' (dict of {str: int}, int) -> NoneType
'''
word = list(word_dict.keys())
count = list(word_dict.values())
while len(count) > limit + 1:
aim = min(count)
aim_index = count.index(aim)
del(word_dict[word[aim_index]])
word.pop(aim_index)
count.pop(aim_index)
if count.count(min(count)) != 1:
target = min(count)
while target in count:
del(word_dict[word[count.index(target)]])
word.pop(count.index(target))
count.pop(count.index(target))
else:
del(word_dict[word[count.index(min(count))]])
word.pop(count.index(min(count)))
count.pop(count.index(min(count)))
def read_tweets(file):
''' (file open for reading) -> dict of {str: list of tweet tuples}
'''
d = {}
line = file.readline()
while line != '':
c = line[:-2]
d[c] = []
line = file.readline()
while line != '' and line.strip()[-1] != ':':
lst = line.split(',')
date = int(lst[1])
source = lst[3]
favourate = int(lst[4])
retweet = int(lst[5].strip())
tweet = ''
line = file.readline()
while line != '<<<EOT\n' and line != '':
tweet += line
line = file.readline()
d[c].append((c, tweet, date, source, favourate, retweet))
line = file.readline()
return d
def most_popular(tweet_dict, d1, d2):
''' (dict of {str: list of tweet tuples}, int, int) -> str
'''
d = {}
for key in tweet_dict:
d[key] = 0
for t in tweet_dict[key]:
if t[2] >= d1 and t[2] <= d2:
d[key] = d[key] + t[4] + t[5]
l1 = list(d.keys())
l2 = list(d.values())
if l2.count(max(l2)) != 1:
return 'Tie'
else:
return l1[l2.index(max(l2))]
def detect_author(tweet_dict, tweet):
'''(dict of {str: list of tweet tuples}, str) -> str
'''
unique_hash = get_unique_hashtags(tweet_dict)
hashtags = extract_hashtags(tweet)
hashs = list(unique_hash.values())
authors = list(unique_hash.keys())
for i in range(hashtags):
if hashtags[i] not in hashs:
hashtags.pop(i)
if len(hashtags) == 1:
return authors[hashs.index(hashtags[0])]
else:
return 'Unknown.'
#### helper ####
def get_unique_hashtags(tweet_dict):
''' (dict of {str: list of tweet tuples}, int, int) ->
dict of {str: list of str}
'''
d = {}
for key in tweet_dict:
d[key] = []
for t in tweet_dict[key]:
d[key].extend(extract_hashtags(t[1]))
l2 = list(d.values())
l3 = []
l4 = []
for sub in l2:
l3.extend(sub)
for tag in l3:
if l3.count(tag) != 1 and (tag not in l4):
l4.append(tag)
for k in d:
for i in range(d[k]):
if d[k][i] in l4:
d[k].pop(i)
return d