Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new Options #272

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions Exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ def main(argv):

if len(argv) == 1 and argv[0] == '-h':
f = open('exporter_help_text.txt', 'r')
print f.read()
print(f.read())
f.close()

return

try:
opts, args = getopt.getopt(argv, "", ("username=", "near=", "within=", "since=", "until=", "querysearch=", "toptweets", "maxtweets=", "output="))
opts, args = getopt.getopt(argv, "", ("not_containing=","min_retweets=","min_replies=","min_likes=","username=", "near=", "within=", "since=", "until=", "querysearch=", "toptweets", "maxtweets=", "output=", "nolinks", "lang="))

tweetCriteria = got.manager.TweetCriteria()
outputFileName = "output_got.csv"
Expand All @@ -42,6 +42,24 @@ def main(argv):

elif opt == '--maxtweets':
tweetCriteria.maxTweets = int(arg)

elif opt == '--nolinks':
tweetCriteria.noLinks = True

elif opt == '--lang':
tweetCriteria.lang = arg

elif opt == '--min_likes':
tweetCriteria.min_likes = arg

elif opt == '--min_replies':
tweetCriteria.min_replies = arg

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi! could you please explain where do you parse the number of replies for a specific tweet?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, the number of replies is parsed from the console as other parameters and then is handled on "TweetManager.py".
For example:
python3 Exporter.py --querysearch "europe" --lang "en" --min_replies 10


elif opt == '--min_retweets':
tweetCriteria.min_retweets = arg

elif opt == '--not_containing':
tweetCriteria.not_containing = arg

elif opt == '--near':
tweetCriteria.near = '"' + arg + '"'
Expand Down
13 changes: 11 additions & 2 deletions exporter_help_text.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@ To use this script you can pass the following attributes:
near: A reference location area from where tweets were generated
within: A distance radius from "near" location (e.g. 15mi)
maxtweets: The maximum number of tweets to retrieve
toptweets: Only the tweets provided as top tweets by Twitter (no parameters required)
toptweets: Only the tweets provided as top tweets by Twitter (no parameters required) not working on python3
output: A filename to export the results (default is "output_got.csv")
nolinks: Only tweets without links
lang: Languge of the tweets (needs to be abreviated, example english: lang="en")
min_likes: Minmum number of likes
min_retweets: Minimum number of retweets
min_replies: Minimum number of replies
not_containing: Word that should not be in the tweet

Examples:
# Example 1 - Get tweets by username [barackobama]
Expand All @@ -20,4 +26,7 @@ python Exporter.py --querysearch "europe refugees" --maxtweets 1
python Exporter.py --username "barackobama" --since 2015-09-10 --until 2015-09-12 --maxtweets 1

# Example 4 - Get the last 10 top tweets by username
python Exporter.py --username "barackobama" --maxtweets 10 --toptweets
python Exporter.py --username "barackobama" --maxtweets 10 --toptweets

# Example 5 - Get 100 tweets with bound dates, in english, without links, with at least 5 retweets and not containing the word "giveaway"
python3 Exporter.py --querysearch "europe" --since 2015-09-11 --until 2015-09-12 --lang "en" --maxtweets 100 --nolinks --min_retweets 5 --not_containing "giveaway"
23 changes: 23 additions & 0 deletions got/manager/TweetManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,32 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):

if hasattr(tweetCriteria, 'since'):
urlGetData += ' since:' + tweetCriteria.since

if hasattr(tweetCriteria, 'noLinks'):
urlGetData += ' -filter:links'

if hasattr(tweetCriteria, 'until'):
urlGetData += ' until:' + tweetCriteria.until

if hasattr(tweetCriteria, 'min_retweets'):
urlGetData += ' min_retweets:' + tweetCriteria.min_retweets

if hasattr(tweetCriteria, 'min_likes'):
urlGetData += ' min_faves:' + tweetCriteria.min_likes

if hasattr(tweetCriteria, 'min_replies'):
urlGetData += ' min_replies:' + tweetCriteria.min_replies

if hasattr(tweetCriteria, 'not_containing'):
lsts = tweetCriteria.not_containing.split()

for word in lsts:
urlGetData += ' -' + word

if hasattr(tweetCriteria, 'lang'):
#args = 'lang=' + tweetCriteria.lang + '&'
urlGetData += ' lang:' + tweetCriteria.lang
#else:


if hasattr(tweetCriteria, 'topTweets'):
Expand Down
36 changes: 29 additions & 7 deletions got3/manager/TweetManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
tweetPQ = PyQuery(tweetHTML)
tweet = models.Tweet()

usernameTweet = tweetPQ("span.username.js-action-profile-name b").text()
usernameTweet = tweetPQ("span:first.username.u-dir b").text()
txt = re.sub(r"\s+", " ", tweetPQ("p.js-tweet-text").text().replace('# ', '#').replace('@ ', '@'))
retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
favorites = int(tweetPQ("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
Expand Down Expand Up @@ -89,7 +89,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
@staticmethod
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
url = "https://twitter.com/i/search/timeline?f=tweets&q=%s&src=typd&%smax_position=%s"

args = ''
urlGetData = ''
if hasattr(tweetCriteria, 'username'):
urlGetData += ' from:' + tweetCriteria.username
Expand All @@ -102,12 +102,34 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):

if hasattr(tweetCriteria, 'querySearch'):
urlGetData += ' ' + tweetCriteria.querySearch

if hasattr(tweetCriteria, 'noLinks'):
urlGetData += ' -filter:links'

if hasattr(tweetCriteria, 'min_retweets'):
urlGetData += ' min_retweets:' + tweetCriteria.min_retweets

if hasattr(tweetCriteria, 'min_likes'):
urlGetData += ' min_faves:' + tweetCriteria.min_likes

if hasattr(tweetCriteria, 'min_replies'):
urlGetData += ' min_replies:' + tweetCriteria.min_replies

if hasattr(tweetCriteria, 'not_containing'):
lsts = tweetCriteria.not_containing.split()

for word in lsts:
urlGetData += ' -' + word

if hasattr(tweetCriteria, 'lang'):
urlLang = 'lang=' + tweetCriteria.lang + '&'
else:
urlLang = ''
url = url % (urllib.parse.quote(urlGetData), urlLang, refreshCursor)
#args = 'lang=' + tweetCriteria.lang + '&'
urlGetData += ' lang:' + tweetCriteria.lang
#else:

#if not hasattr(tweetCriteria, 'topTweets'):
#args = ' f=live'

url = url % (urllib.parse.quote(urlGetData), args, refreshCursor)
#print(url)

headers = [
Expand All @@ -119,7 +141,7 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
('Referer', url),
('Connection', "keep-alive")
]

print(url)
if proxy:
opener = urllib.request.build_opener(urllib.request.ProxyHandler({'http': proxy, 'https': proxy}), urllib.request.HTTPCookieProcessor(cookieJar))
else:
Expand Down