-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathquery_tweets.py
executable file
·60 lines (51 loc) · 2.07 KB
/
query_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
import argcomplete
import json
import os
import time
from nltk.twitter import Query, credsfromfile
parser = argparse.ArgumentParser(description='collect tweets based on keywords')
parser.add_argument('-k', '--keywords-file', type=argparse.FileType(mode='r', encoding='utf-8'),
help='keywords or hashtags file. The file should contain one keyword/hashtag per line',
required=True)
parser.add_argument('-o', '--outfile', type=argparse.FileType(mode='a', encoding='utf-8'),
help='the output json file.', required=True)
parser.add_argument('-n', '--number', type=int,
help='the number of tweets that you want to collect', required=True)
parser.add_argument('-l', '--lang', type=str,
help='language', required=True)
# cwd = os.getcwd()
# set environmental variable
current_dir = os.path.dirname(os.path.realpath(__file__))
os.environ["TWITTER"] = current_dir + os.path.sep + "twitter-files"
def dump_tweets(tweets, json_writer):
count = 0
try:
for tweet in tweets:
json.dump(tweet, json_writer)
json_writer.write("\n")
count += 1
except IOError as err:
print("error: {0}".format(err))
except BaseException as err:
print("error: {0}".format(err))
if count > 1:
print("{} tweets found".format(count))
else:
print('no tweets found')
def collect_tweets(my_keyword, json_writer, stop_num, lang):
my_keyword = my_keyword.strip()
print('finding tweets with {} keyword'.format(my_keyword))
oauth = credsfromfile()
client = Query(**oauth)
tweets = client.search_tweets(keywords=my_keyword, limit=stop_num, lang=lang)
dump_tweets(tweets, json_writer)
if __name__ == '__main__':
args = parser.parse_args()
argcomplete.autocomplete(parser)
queries = args.keywords_file.read().splitlines()
outfile = args.outfile
number = args.number
lang = args.lang
for query in queries:
collect_tweets(my_keyword=query, json_writer=outfile, stop_num=number, lang=lang)