-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtweetset_cli.py
92 lines (74 loc) · 3.67 KB
/
tweetset_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from elasticsearch_dsl.connections import connections
from utils import dataset_params_to_search
import json
import argparse
import sys
connections.create_connection(hosts=['localhost'], timeout=90)
def fetch_by_screen_name(screen_name, source_datasets):
search = dataset_params_to_search({
'source_datasets': source_datasets,
'tweet_type_original': 'true',
'tweet_type_reply': 'true',
'tweet_type_retweet': 'true',
'tweet_type_quote': 'true',
'poster_any': screen_name.lstrip('@')
}, skip_aggs=True)
search.source(['tweet'])
for hit in search.scan():
yield json.loads(hit.tweet)
def fetch_by_source_screen_name(screen_name, source_datasets):
search = dataset_params_to_search({
'source_datasets': source_datasets,
'tweet_type_retweet': 'true',
'tweet_type_quote': 'true',
'source_poster_any': screen_name.lstrip('@')
}, skip_aggs=True)
search.source(['tweet'])
for hit in search.scan():
tweet = json.loads(hit.tweet)
if 'retweeted_status' in tweet:
yield tweet['retweeted_status']
elif 'quoted_status' in tweet:
yield tweet['quoted_status']
def fetch_by_mention_screen_name(mention_screen_name, source_datasets):
search = dataset_params_to_search({
'source_datasets': source_datasets,
'tweet_type_original': 'true',
'tweet_type_reply': 'true',
'tweet_type_retweet': 'true',
'tweet_type_quote': 'true',
'mention_any': mention_screen_name.lstrip('@')
}, skip_aggs=True)
search.source(['tweet'])
for hit in search.scan():
yield json.loads(hit.tweet)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('datasets', help='comma separated list of dataset ids')
# parser.add_argument('--output-dir', help='default is {}'.format(os.getcwd()), default=os.getcwd())
subparsers = parser.add_subparsers(dest='command', help='command help')
by_screen_name_parser = subparsers.add_parser('by_screen_name',
help='Fetch tweets posted by user with provided screen name.')
by_screen_name_parser.add_argument('screen_name')
by_source_screen_name_parser = subparsers.add_parser('by_source_screen_name',
help='Fetch tweets that are posted by user with provided '
'screen name by looking in retweets or quotes of those '
'tweets. The source tweet is extracted from the retweet '
'or quote.')
by_source_screen_name_parser.add_argument('screen_name')
by_mention_screen_name_parser = subparsers.add_parser('by_mention_screen_name',
help='Fetch tweets in which user with provided screen name '
'is mentioned.')
by_mention_screen_name_parser.add_argument('screen_name')
args = parser.parse_args()
if args.command is None:
parser.print_help()
sys.exit(1)
elif args.command == 'by_screen_name':
tweets = fetch_by_screen_name(args.screen_name, args.datasets.split(','))
elif args.command == 'by_source_screen_name':
tweets = fetch_by_source_screen_name(args.screen_name, args.datasets.split(','))
elif args.command == 'by_mention_screen_name':
tweets = fetch_by_mention_screen_name(args.screen_name, args.datasets.split(','))
for tweet in tweets:
print(json.dumps(tweet))