-
Notifications
You must be signed in to change notification settings - Fork 3
/
info_graph.py
executable file
·123 lines (82 loc) · 2.8 KB
/
info_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pandas as pd
import numpy as np
from sklearn import linear_model
import tweepy
import requests
import re
import time
from tweepy import OAuthHandler
from get_config import get_config
env = get_config()
consumer_key = env.get('CONSUMER_KEY')
consumer_secret = env.get('CONSUMER_SECRET')
access_token = env.get('ACCESS_TOKEN')
access_secret = env.get('ACCESS_TOKEN_SECRET')
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
t = pd.read_csv('fetcher/top100.csv')
def rt_usrs(tweet_id):
retweets = api.retweets(tweet_id, 100)
return [rt.user.id for rt in retweets]
def t_all_tweets(user,n):
result = []
count = 0
for x in range(n):
tweets = api.user_timeline(id=user,count=200,page=x+1,include_rts=True)
result += tweets
count += 1
if (x+1)%10 == 0:
print 'sleep for 90 seconds'
time.sleep(90)
print count, 'of ', n, 'pages done'
return result
def t_mentions(user):
tweets = t_all_tweets(user, 2) # first 2 pages timeline, 16 pages max
t_text = ''
for t in tweets:
t_text += t.text
return len(re.findall('(@thisisfusion|@ThisIsFusion)', t_text)) # number of direct mentions + retweets
def t_user_rank(users):
udic = {}
count = 0
for user in users:
screen_name = api.get_user(id=user).screen_name
follower = api.get_user(id=user).followers_count
mention = t_mentions(user)
udic[screen_name] = [follower, mention, (follower*mention)]
count += 1
print count, 'of', len(users), 'users added into dictionary'
if count%5 == 0:
print 'sleep for one minute'
time.sleep(60)
return udic
def t_tweets_influencers(n):
count = 0
for i in range(n):
if not i:
udic = t_user_rank(rt_usrs(t.t_id[i])) # first 3 users, 100 max
follower = [udic.values()[x][0] for x in range(len(udic))]
mention = [udic.values()[x][1] for x in range(len(udic))]
score = [udic.values()[x][2] for x in range(len(udic))]
keys = udic.keys()
t_id = [t.t_id[i] for x in range(len(udic))]
newdic = {'t_id':t_id,'influencer':keys,'score':score,'mention':mention,'follower':follower}
else:
udic = t_user_rank(rt_usrs(t.t_id[i])) # first 3 users, 100 max
follower = [udic.values()[x][0] for x in range(len(udic))]
mention = [udic.values()[x][1] for x in range(len(udic))]
score = [udic.values()[x][2] for x in range(len(udic))]
keys = udic.keys()
t_id = [t.t_id[i] for x in range(len(udic))]
newdic['t_id'] += t_id
newdic['influencer'] += keys
newdic['score'] += score
newdic['mention'] += mention
newdic['follower'] += follower
count += 1
print '-------', count, 'of', n, 'tweets analyzed', '-------'
return newdic
result = t_tweets_influencers(20) # first 2 popular tweets, 100 max
df = pd.DataFrame(result)
df.to_csv('influencers(20 posts).csv', encoding='utf-8')