diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index cfa68b1..8a410e8 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -8,6 +8,9 @@ import jwt import redis import requests +from praw import Reddit +from praw.exceptions import PRAWException +from prawcore import NotFound, Forbidden from sqlalchemy import text, func from redditrepostsleuth.core.config import Config @@ -16,7 +19,12 @@ from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager from redditrepostsleuth.core.logging import get_configured_logger +from redditrepostsleuth.core.notification.notification_service import NotificationService +from redditrepostsleuth.core.services.response_handler import ResponseHandler from redditrepostsleuth.core.util.constants import EXCLUDE_FROM_TOP_REPOSTERS +from redditrepostsleuth.core.util.reddithelpers import is_sub_mod_praw, get_bot_permissions +from redditrepostsleuth.core.util.replytemplates import MONITORED_SUB_MOD_REMOVED_CONTENT, \ + MONITORED_SUB_MOD_REMOVED_SUBJECT log = logging.getLogger(__name__) log = get_configured_logger(__name__) @@ -139,6 +147,85 @@ def token_checker() -> None: redis_client.set('prof_token', decoded_token['sub']) log.info('New token set in Redis') +def update_monitored_sub_data( + uow: UnitOfWork, + subreddit_name: str, + reddit: Reddit, + notification_svc: NotificationService, + response_handler: ResponseHandler +) -> None: + monitored_sub = uow.monitored_sub.get_by_sub(subreddit_name) + if not monitored_sub: + log.error('Failed to find subreddit %s', subreddit_name) + return + subreddit = reddit.subreddit(monitored_sub.name) + + monitored_sub.is_mod = is_sub_mod_praw(monitored_sub.name, 'repostsleuthbot', reddit) + + if not monitored_sub.failed_admin_check_count: + monitored_sub.failed_admin_check_count = 0 + + if monitored_sub.is_mod: + if monitored_sub.failed_admin_check_count > 0: + notification_svc.send_notification( + f'Failed admin check for r/{monitored_sub.name} reset', + subject='Failed Admin Check Reset' + ) + monitored_sub.failed_admin_check_count = 0 + uow.commit() + else: + monitored_sub.failed_admin_check_count += 1 + monitored_sub.active = False + uow.commit() + notification_svc.send_notification( + f'Failed admin check for https://reddit.com/r/{monitored_sub.name} increased to {monitored_sub.failed_admin_check_count}.', + subject='Failed Admin Check Increased' + ) + return + + if monitored_sub.failed_admin_check_count == 2: + subreddit = reddit.subreddit(monitored_sub.name) + message = MONITORED_SUB_MOD_REMOVED_CONTENT.format(hours='72', subreddit=monitored_sub.name) + try: + response_handler.send_mod_mail( + subreddit.display_name, + message, + MONITORED_SUB_MOD_REMOVED_SUBJECT, + source='mod_check' + ) + except PRAWException: + pass + return + elif monitored_sub.failed_admin_check_count >= 4 and monitored_sub.name.lower() != 'dankmemes': + notification_svc.send_notification( + f'Sub r/{monitored_sub.name} failed admin check 4 times. Removing', + subject='Removing Monitored Subreddit' + ) + uow.monitored_sub.remove(monitored_sub) + uow.commit() + return + + try: + monitored_sub.subscribers = subreddit.subscribers + except NotFound as e: + log.warning('Subreddit %s has been banned. Removing', monitored_sub.name) + uow.monitored_sub.remove(monitored_sub) + uow.commit() + return + + monitored_sub.is_private = True if subreddit.subreddit_type == 'private' else False + monitored_sub.nsfw = True if subreddit.over18 else False + log.info('[Subscriber Update] %s: %s subscribers', monitored_sub.name, monitored_sub.subscribers) + + perms = get_bot_permissions(subreddit) if monitored_sub.is_mod else [] + monitored_sub.post_permission = True if 'all' in perms or 'posts' in perms else None + monitored_sub.wiki_permission = True if 'all' in perms or 'wiki' in perms else None + log.info('[Mod Check] %s | Post Perm: %s | Wiki Perm: %s', monitored_sub.name, monitored_sub.post_permission, + monitored_sub.wiki_permission) + + + + uow.commit() if __name__ == '__main__': uowm = UnitOfWorkManager(get_db_engine(Config())) diff --git a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py index 9e94ab0..cbb2994 100644 --- a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py @@ -1,5 +1,5 @@ from praw.exceptions import PRAWException -from prawcore import TooManyRequests +from prawcore import TooManyRequests, Redirect from redditrepostsleuth.adminsvc.bot_comment_monitor import BotCommentMonitor from redditrepostsleuth.adminsvc.inbox_monitor import InboxMonitor @@ -9,7 +9,7 @@ from redditrepostsleuth.core.celery import celery from redditrepostsleuth.core.celery.basetasks import RedditTask, SqlAlchemyTask, AdminTask from redditrepostsleuth.core.celery.task_logic.scheduled_task_logic import update_proxies, update_top_reposts, \ - token_checker, run_update_top_reposters, update_top_reposters + token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data from redditrepostsleuth.core.db.databasemodels import MonitoredSub, StatsDailyCount from redditrepostsleuth.core.logging import configure_logger from redditrepostsleuth.core.util.reddithelpers import is_sub_mod_praw, get_bot_permissions @@ -77,9 +77,9 @@ def update_monitored_sub_data_task(self) -> None: log.info('Starting Job: Update Subreddit Data') try: with self.uowm.start() as uow: - subs = uow.monitored_sub.get_all_active() + subs = uow.monitored_sub.get_all() for sub in subs: - update_monitored_sub_stats.apply_async((sub.name,)) + update_monitored_sub_stats_task.apply_async((sub.name,)) except Exception as e: log.exception('Problem with scheduled task') @@ -121,8 +121,21 @@ def check_meme_template_potential_votes_task(self): log.exception('Problem in scheduled task') @celery.task(bind=True, base=AdminTask, autoretry_for=(TooManyRequests,), retry_kwards={'max_retries': 3}) -def check_for_subreddit_config_update_task(self, monitored_sub: MonitoredSub) -> None: - self.config_updater.check_for_config_update(monitored_sub, notify_missing_keys=False) +def check_for_subreddit_config_update_task(self, subreddit_name: str) -> None: + with self.uowm.start() as uow: + + try: + monitored_sub = uow.monitored_sub.get_by_sub(subreddit_name) + self.config_updater.check_for_config_update(monitored_sub, notify_missing_keys=False) + except TooManyRequests: + raise + except Redirect as e: + if str(e) == 'Redirect to /subreddits/search': + log.warning('Subreddit %s no longer exists. Setting to inactive in database', monitored_sub.name) + monitored_sub.active = False + uow.commit() + except Exception as e: + log.exception('') @celery.task(bind=True, base=RedditTask) def queue_config_updates_task(self): @@ -131,9 +144,9 @@ def queue_config_updates_task(self): print('[Scheduled Job] Queue config update check') with self.uowm.start() as uow: - monitored_subs = uow.monitored_sub.get_all() + monitored_subs = uow.monitored_sub.get_all_active() for monitored_sub in monitored_subs: - check_for_subreddit_config_update_task.apply_async((monitored_sub,)) + check_for_subreddit_config_update_task.apply_async((monitored_sub.name,)) print('[Scheduled Job Complete] Queue config update check') except Exception as e: @@ -142,6 +155,7 @@ def queue_config_updates_task(self): @celery.task(bind=True, base=SqlAlchemyTask) def update_daily_stats(self): + log.info('[Daily Stat Update] Started') daily_stats = StatsDailyCount() try: with self.uowm.start() as uow: @@ -156,9 +170,9 @@ def update_daily_stats(self): daily_stats.monitored_subreddit_count = uow.monitored_sub.count() uow.stat_daily_count.add(daily_stats) uow.commit() - log.info('Updated daily stats') + log.info('[Daily Stat Update] Finished') except Exception as e: - log.exception('') + log.exception('Problem updating stats') @@ -189,62 +203,21 @@ def update_top_reposts_task(self): -@celery.task(bind=True, base=RedditTask) -def update_monitored_sub_stats(self, sub_name: str) -> None: - with self.uowm.start() as uow: - monitored_sub: MonitoredSub = uow.monitored_sub.get_by_sub(sub_name) - if not monitored_sub: - log.error('Failed to find subreddit %s', sub_name) - return - subreddit = self.reddit.subreddit(monitored_sub.name) - monitored_sub.subscribers = subreddit.subscribers - monitored_sub.is_private = True if subreddit.subreddit_type == 'private' else False - monitored_sub.nsfw = True if subreddit.over18 else False - log.info('[Subscriber Update] %s: %s subscribers', monitored_sub.name, monitored_sub.subscribers) - monitored_sub.is_mod = is_sub_mod_praw(monitored_sub.name, 'repostsleuthbot', self.reddit) - perms = get_bot_permissions(subreddit) if monitored_sub.is_mod else [] - monitored_sub.post_permission = True if 'all' in perms or 'posts' in perms else None - monitored_sub.wiki_permission = True if 'all' in perms or 'wiki' in perms else None - log.info('[Mod Check] %s | Post Perm: %s | Wiki Perm: %s', monitored_sub.name, monitored_sub.post_permission, monitored_sub.wiki_permission) - - if not monitored_sub.failed_admin_check_count: - monitored_sub.failed_admin_check_count = 0 - - if monitored_sub.is_mod: - if monitored_sub.failed_admin_check_count > 0: - self.notification_svc.send_notification( - f'Failed admin check for r/{monitored_sub.name} reset', - subject='Failed Admin Check Reset' - ) - monitored_sub.failed_admin_check_count = 0 - else: - monitored_sub.failed_admin_check_count += 1 - monitored_sub.active = False - self.notification_svc.send_notification( - f'Failed admin check for https://reddit.com/r/{monitored_sub.name} increased to {monitored_sub.failed_admin_check_count}.', - subject='Failed Admin Check Increased' - ) - - if monitored_sub.failed_admin_check_count == 2: - subreddit = self.reddit.subreddit(monitored_sub.name) - message = MONITORED_SUB_MOD_REMOVED_CONTENT.format(hours='72', subreddit=monitored_sub.name) - try: - self.response_handler.send_mod_mail( - subreddit.display_name, - message, - MONITORED_SUB_MOD_REMOVED_SUBJECT, - source='mod_check' - ) - except PRAWException: - pass - elif monitored_sub.failed_admin_check_count >= 4 and monitored_sub.name.lower() != 'dankmemes': - self.notification_svc.send_notification( - f'Sub r/{monitored_sub.name} failed admin check 4 times. Removing', - subject='Removing Monitored Subreddit' +@celery.task(bind=True, base=RedditTask, autoretry_for=(TooManyRequests,), retry_kwards={'max_retries': 3}) +def update_monitored_sub_stats_task(self, sub_name: str) -> None: + try: + with self.uowm.start() as uow: + update_monitored_sub_data( + uow, + sub_name, + self.reddit, + self.notification_svc, + self.response_handler ) - uow.monitored_sub.remove(monitored_sub) - - uow.commit() + except TooManyRequests: + raise + except Exception as e: + log.exception('') @celery.task(bind=True, base=SqlAlchemyTask) def update_proxies_task(self) -> None: diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index b97d28d..499c114 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -46,7 +46,7 @@ def __repr__(self) -> str: searches = relationship('RepostSearch', back_populates='post') reports = relationship('UserReport', back_populates='post') hashes = relationship('PostHash', back_populates='post') - post_type = relationship('PostType', lazy='joined') # lazy has to be set to JSON encoders don't fail for unbound session + post_type = relationship('PostType') # lazy has to be set to JSON encoders don't fail for unbound session def to_dict(self): return { @@ -82,7 +82,7 @@ def __repr__(self) -> str: post_created_at = Column(DateTime, nullable=False) # TODO: change to default timestamp post = relationship("Post", back_populates='hashes') - hash_type = relationship("HashType", lazy='joined') + hash_type = relationship("HashType") def to_dict(self): return { diff --git a/redditrepostsleuth/core/db/repository/monitoredsubrepository.py b/redditrepostsleuth/core/db/repository/monitoredsubrepository.py index 3bb3c6b..db5bde8 100644 --- a/redditrepostsleuth/core/db/repository/monitoredsubrepository.py +++ b/redditrepostsleuth/core/db/repository/monitoredsubrepository.py @@ -13,7 +13,7 @@ def add(self, item): self.db_session.add(item) def get_all(self, limit: int = None) -> List[MonitoredSub]: - return self.db_session.query(MonitoredSub).order_by(MonitoredSub.subscribers.desc()).limit(limit).all() + return self.db_session.query(MonitoredSub).order_by(MonitoredSub.subscribers.asc()).limit(limit).all() def get_all_active(self, limit: int = None) -> List[MonitoredSub]: return self.db_session.query(MonitoredSub).filter(MonitoredSub.active == True).order_by(MonitoredSub.subscribers.desc()).limit(limit).all() diff --git a/redditrepostsleuth/core/services/subreddit_config_updater.py b/redditrepostsleuth/core/services/subreddit_config_updater.py index 9223cd2..6fde2f8 100644 --- a/redditrepostsleuth/core/services/subreddit_config_updater.py +++ b/redditrepostsleuth/core/services/subreddit_config_updater.py @@ -4,6 +4,7 @@ from typing import Text, List, NoReturn from praw import Reddit +from praw.exceptions import RedditAPIException from praw.models import WikiPage, Subreddit from prawcore import NotFound, Forbidden, ResponseException, TooManyRequests from sqlalchemy import func @@ -226,7 +227,10 @@ def update_wiki_config_from_database( if not new_config: log.error('Failed to generate new config for %s', monitored_sub.name) return False - self._update_wiki_page(wiki_page, new_config) + try: + self._update_wiki_page(wiki_page, new_config) + except RedditAPIException as e: + log.exception('') wiki_page = subreddit.wiki['repost_sleuth_config'] # Force refresh so we can get latest revision ID self._create_revision(wiki_page, monitored_sub) self._set_config_validity(wiki_page.revision_id, True) @@ -346,7 +350,7 @@ def _create_wiki_page(self, subreddit: Subreddit): try: subreddit.wiki.create(self.config.wiki_config_name, json.dumps(DEFAULT_CONFIG_VALUES)) except NotFound: - log.exception('Failed to create wiki page', exc_info=False) + log.warning('Failed to create wiki page for %s', subreddit.display_name) raise self.notification_svc.send_notification( diff --git a/redditrepostsleuth/core/util/imagehashing.py b/redditrepostsleuth/core/util/imagehashing.py index 73e6eb2..733a8ee 100644 --- a/redditrepostsleuth/core/util/imagehashing.py +++ b/redditrepostsleuth/core/util/imagehashing.py @@ -73,6 +73,8 @@ def set_image_hashes(post: Post, hash_size: int = 16) -> Post: dhash_v = imagehash.dhash_vertical(img, hash_size=hash_size) post.hashes.append(PostHash(hash=str(dhash_h), hash_type_id=1, post_created_at=post.created_at)) post.hashes.append(PostHash(hash=str(dhash_v), hash_type_id=2, post_created_at=post.created_at)) + except OSError as e: + log.warning('Problem hashing image: %s', e) except Exception as e: # TODO: Specific exception log.exception('Error creating hash', exc_info=True) diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 00f4264..6601f8a 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -185,7 +185,7 @@ async def main() -> None: continue res_data = json.loads(results) - if not len(res_data['data']['children']): + if not res_data or not len(res_data['data']['children']): log.info('No results') continue