Skip to content

Commit

Permalink
Cleanup and final redgif support
Browse files Browse the repository at this point in the history
  • Loading branch information
barrycarey committed Feb 18, 2024
1 parent cd56958 commit 55d6a9c
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 22 deletions.
13 changes: 11 additions & 2 deletions redditrepostsleuth/core/celery/task_logic/ingest_task_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ def pre_process_post(

post = reddit_submission_to_post(submission)

proxy = None
parsed_url = urlparse(post.url)
if parsed_url.netloc in domains_to_proxy:
proxy = proxy_manager.get_proxy().address

if post.post_type_id == 2: # image

# Hacky RedGif support. Will need to be refactored if we have to do similar for other sites
redgif_url = None
if 'redgif' in post.url:
token = redgif_manager.get_redgifs_token()
Expand All @@ -62,7 +69,7 @@ def pre_process_post(
redgif_manager.remove_redgifs_token('localhost')
raise e

process_image_post(post, url=redgif_url)
process_image_post(post, url=redgif_url, proxy=proxy)
elif post.post_type_id == 6: # gallery
process_gallery(post, submission)

Expand All @@ -82,7 +89,9 @@ def process_image_post(post: Post, url: str = None, proxy: str = None, hash_size
:param hash_size: Size of hash
:return: Post object with hashes
"""
log.info('Hashing image with URL: %s', post.url)
log.debug('Hashing image with URL: %s', post.url)
if url:
log.info('Hashing %s', post.url)

try:
img = generate_img_by_url_requests(url or post.url, proxy=proxy)
Expand Down
2 changes: 1 addition & 1 deletion redditrepostsleuth/core/celery/tasks/ingest_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def save_new_post(self, submission: dict, repost_check: bool = True):

monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit)
if monitored_sub and monitored_sub.active:
log.info('Sending ingested post to monitored sub queue')
log.info('Sending ingested post to monitored sub queue for %s', monitored_sub.name)
celery.send_task('redditrepostsleuth.core.celery.tasks.monitored_sub_tasks.sub_monitor_check_post',
args=[post.post_id, monitored_sub],
queue='submonitor', countdown=20)
Expand Down
29 changes: 26 additions & 3 deletions redditrepostsleuth/core/services/redgifs_token_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@

log = logging.getLogger(__name__)

"""
Class for managing and caching RedGifs API tokens. Currently overkill but if we need to backfill the database or
API rate limits get tight this will support caching a token for each proxy to Redis
"""
class RedGifsTokenManager:
def __init__(self):
config = Config()
Expand All @@ -22,16 +26,30 @@ def __init__(self):
)


def _cache_token(self, key: str, token: str):
def _cache_token(self, key: str, token: str) -> None:
"""
Take a given token and cache it to Redis
:param key: key of the token
:param token: API token
"""
log.info('Caching token for %s', key)
self.redis.set(f'redgifs-token:{key}', token, ex=82800)

def remove_redgifs_token(self, key: str):
def remove_redgifs_token(self, key: str) -> None:
"""
Removed a cached token from Redis with a given key
:param key: key to remove
"""
log.info('Removing token for %s', key)
self.redis.delete(f'redgifs-token:{key}')


def get_redgifs_token(self, address: str = 'localhost') -> str:
"""
Either return an existing cached token or create a new one
:param address: address of the proxy being used
:return: Token
"""
cached_token = self.redis.get(f'redgifs-token:{address}')
if not cached_token:
return self._request_and_cache_token(address)
Expand All @@ -40,7 +58,12 @@ def get_redgifs_token(self, address: str = 'localhost') -> str:
return cached_token


def _request_and_cache_token(self, proxy_address):
def _request_and_cache_token(self, proxy_address: str = 'localhost') -> str:
"""
Hit the Redgif API and request a new auth token. Cache it to Redis
:param proxy_address: Proxy to use, if any
:return: Token
"""
proxies = None
if proxy_address != 'localhost':
proxies = {'http': f'https://{proxy_address}', 'https': f'http://{proxy_address}'}
Expand Down
49 changes: 34 additions & 15 deletions redditrepostsleuth/core/util/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,33 +250,52 @@ def get_default_image_search_settings(config: Config) -> ImageSearchSettings:
)

def get_image_search_settings_from_request(req, config: Config) -> ImageSearchSettings:
return ImageSearchSettings(
search_settings = ImageSearchSettings(
req.get_param_as_int('target_match_percent', required=True, default=None) or config.default_image_target_match,
config.default_image_target_annoy_distance,
target_title_match=req.get_param_as_int('target_title_match', required=False,
default=None) or config.default_image_target_title_match,
filter_dead_matches=req.get_param_as_bool('filter_dead_matches', required=False,
default=None) or config.default_image_dead_matches_filter,
filter_removed_matches=req.get_param_as_bool('filter_removed_matches', required=False,
default=None) or config.default_image_removed_match_filter,
only_older_matches=req.get_param_as_bool('only_older_matches', required=False,
default=None) or config.default_image_only_older_matches,
filter_same_author=req.get_param_as_bool('filter_same_author', required=False,
default=None) or config.default_image_same_author_filter,
filter_crossposts=req.get_param_as_bool('filter_crossposts', required=False,
default=None) or config.default_image_crosspost_filter,
filter_dead_matches=req.get_param_as_bool('filter_dead_matches', required=False, default=None),
filter_removed_matches=req.get_param_as_bool('filter_removed_matches', required=False, default=None),
only_older_matches=req.get_param_as_bool('only_older_matches', required=False, default=None),
filter_same_author=req.get_param_as_bool('filter_same_author', required=False, default=None),
filter_crossposts=req.get_param_as_bool('include_crossposts', required=False, default=None),
target_meme_match_percent=req.get_param_as_int('target_meme_match_percent', required=False,
default=None) or config.default_image_target_meme_match,
meme_filter=req.get_param_as_bool('meme_filter', required=False,
default=None) or config.default_image_meme_filter,
same_sub=req.get_param_as_bool('same_sub', required=False,
default=None) or config.default_image_same_sub_filter,
meme_filter=req.get_param_as_bool('meme_filter', required=False, default=None),
same_sub=req.get_param_as_bool('same_sub', required=False, default=None),
max_days_old=req.get_param_as_int('max_days_old', required=False,
default=None) or config.default_link_max_days_old_filter,
max_depth=10000

)

if search_settings.filter_dead_matches is None:
search_settings.filter_dead_matches = config.default_image_dead_matches_filter

if search_settings.filter_removed_matches is None:
search_settings.filter_removed_matches = config.default_image_removed_match_filter

if search_settings.only_older_matches is None:
search_settings.only_older_matches = config.default_image_only_older_matches

if search_settings.filter_same_author is None:
search_settings.filter_same_author = config.default_image_same_author_filter

if search_settings.meme_filter is None:
search_settings.meme_filter = config.default_image_meme_filter

if search_settings.filter_crossposts is None:
search_settings.filter_crossposts = config.default_image_crosspost_filter
else:
search_settings.filter_crossposts = not search_settings.filter_crossposts

if search_settings.same_sub is None:
search_settings.same_sub = config.default_image_same_sub_filter


return search_settings


def get_default_link_search_settings(config: Config) -> SearchSettings:
return SearchSettings(
Expand Down
2 changes: 1 addition & 1 deletion redditrepostsleuth/ingestsvc/ingestsvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ async def main() -> None:
oldest_post = uow.posts.get_newest_post()
oldest_id = oldest_post.post_id

#await ingest_range(newest_id, oldest_id)
await ingest_range(newest_id, oldest_id)

delay = 0
while True:
Expand Down

0 comments on commit 55d6a9c

Please sign in to comment.