From 5b9efd33f199bc6d3f6298a13c5476c9738c31b0 Mon Sep 17 00:00:00 2001 From: barry Date: Sun, 21 Jan 2024 19:32:07 -0500 Subject: [PATCH 01/10] Ingest and scheduled task fixes unrelated to branch name. --- docker-compose.yml | 4 +- .../celery/task_logic/scheduled_task_logic.py | 59 +++++----- .../core/celery/tasks/ingest_tasks.py | 19 +-- .../core/celery/tasks/scheduled_tasks.py | 9 +- redditrepostsleuth/core/db/databasemodels.py | 1 + redditrepostsleuth/core/model/misc_models.py | 1 + .../core/services/responsebuilder.py | 2 +- redditrepostsleuth/ingestsvc/ingestsvc.py | 108 +++++++++++------- 8 files changed, 124 insertions(+), 79 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cc7b924..422e872 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -134,9 +134,9 @@ services: environment: - RUN_ENV=production - db_user=ingest - - LOG_LEVEL=ERROR + - LOG_LEVEL=INFO - CELERY_IMPORTS=redditrepostsleuth.core.celery.tasks.ingest_tasks - entrypoint: celery -A redditrepostsleuth.core.celery worker -Q post_ingest -n ingest_worker --autoscale=3,16 + entrypoint: celery -A redditrepostsleuth.core.celery worker -Q post_ingest -n ingest_worker --autoscale=16,1 link_repost_worker: container_name: link-repost-worker diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index c686307..3308dd5 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -57,35 +57,42 @@ def update_proxies(uowm: UnitOfWorkManager) -> None: ) uow.commit() -def update_top_reposts(uowm: UnitOfWorkManager): +def update_top_reposts(uow: UnitOfWork, post_type_id: int, day_range: int = None): # reddit.info(reddit_ids_to_lookup): - post_types = [2, 3] - day_ranges = [1, 7, 14, 30, 365, None] + log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range) range_query = "SELECT repost_of_id, COUNT(*) c FROM repost WHERE detected_at > NOW() - INTERVAL :days DAY AND post_type_id=:posttype GROUP BY repost_of_id HAVING c > 5 ORDER BY c DESC" all_time_query = "SELECT repost_of_id, COUNT(*) c FROM repost WHERE post_type_id=:posttype GROUP BY repost_of_id HAVING c > 5 ORDER BY c DESC" - with uowm.start() as uow: - for post_type in post_types: - for days in day_ranges: - log.info('Getting top reposts for post type %s with range %s', post_type, days) - if days: - query = range_query - else: - query = all_time_query - uow.session.execute( - text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range=:days'), - {'posttype': post_type, 'days': days}) - uow.commit() - result = uow.session.execute(text(query), {'posttype': post_type, 'days': days}) - for row in result: - stat = StatsTopRepost() - stat.post_id = row[0] - stat.post_type_id = post_type - stat.day_range = days - stat.repost_count = row[1] - stat.updated_at = func.utc_timestamp() - stat.nsfw = False - uow.stat_top_repost.add(stat) - uow.commit() + if day_range: + query = range_query + uow.session.execute(text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range=:days'), + {'posttype': post_type_id, 'days': day_range}) + else: + query = all_time_query + uow.session.execute(text('DELETE FROM stat_top_repost WHERE post_type_id=:posttype AND day_range IS NULL'), + {'posttype': post_type_id}) + + uow.commit() + + + + result = uow.session.execute(text(query), {'posttype': post_type_id, 'days': day_range}) + for row in result: + stat = StatsTopRepost() + stat.post_id = row[0] + stat.post_type_id = post_type_id + stat.day_range = day_range + stat.repost_count = row[1] + stat.updated_at = func.utc_timestamp() + stat.nsfw = False + uow.stat_top_repost.add(stat) + uow.commit() + +def run_update_top_reposts(uow: UnitOfWork) -> None: + post_types = [1, 2, 3] + day_ranges = [1, 7, 14, 30, None] + for post_type_id in post_types: + for days in day_ranges: + update_top_reposts(uow, post_type_id, days) def update_top_reposters(uow: UnitOfWork, post_type_id: int, day_range: int = None) -> None: log.info('Getting top repostors for post type %s with range %s', post_type_id, day_range) diff --git a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py index 37ce7c8..a2b86bb 100644 --- a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py @@ -12,7 +12,7 @@ @celery.task(bind=True, base=SqlAlchemyTask, ignore_reseults=True, serializer='pickle', autoretry_for=(ConnectionError,ImageConversionException,GalleryNotProcessed), retry_kwargs={'max_retries': 10, 'countdown': 300}) -def save_new_post(self, submission: dict): +def save_new_post(self, submission: dict, repost_check: bool = True): # TODO: temp fix until I can fix imgur gifs if 'imgur' in submission['url'] and 'gifv' in submission['url']: @@ -48,21 +48,22 @@ def save_new_post(self, submission: dict): log.exception('Database save failed: %s', str(e), exc_info=False) return - if post.post_type_id == 1: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post]) - elif post.post_type_id == 2: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post]) - elif post.post_type_id == 3: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post]) + if repost_check: + if post.post_type_id == 1: + celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post]) + elif post.post_type_id == 2: + celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post]) + elif post.post_type_id == 3: + celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post]) celery.send_task('redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans', args=[post.author]) @celery.task -def save_new_posts(posts: list[dict]) -> None: +def save_new_posts(posts: list[dict], repost_check: bool = True) -> None: for post in posts: - save_new_post.apply_async((post,)) + save_new_post.apply_async((post, repost_check)) @celery.task(bind=True, base=SqlAlchemyTask, ignore_results=True) def save_pushshift_results(self, data): diff --git a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py index 61c5af3..7f2b117 100644 --- a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py @@ -9,7 +9,7 @@ from redditrepostsleuth.core.celery import celery from redditrepostsleuth.core.celery.basetasks import RedditTask, SqlAlchemyTask, AdminTask from redditrepostsleuth.core.celery.task_logic.scheduled_task_logic import update_proxies, update_top_reposts, \ - token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data + token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data, run_update_top_reposts from redditrepostsleuth.core.db.databasemodels import MonitoredSub, StatsDailyCount from redditrepostsleuth.core.logging import configure_logger from redditrepostsleuth.core.util.reddithelpers import is_sub_mod_praw, get_bot_permissions @@ -178,6 +178,13 @@ def update_daily_stats(self): log.exception('Problem updating stats') +@celery.task(bind=True, base=SqlAlchemyTask) +def update_all_top_reposts_task(self): + try: + with self.uowm.start() as uow: + run_update_top_reposts(uow) + except Exception as e: + log.exception('Unknown task error') @celery.task(bind=True, base=SqlAlchemyTask) def update_all_top_reposters_task(self): diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index 1de399e..56ee65e 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -47,6 +47,7 @@ def __repr__(self) -> str: reports = relationship('UserReport', back_populates='post') hashes = relationship('PostHash', back_populates='post') post_type = relationship('PostType') # lazy has to be set to JSON encoders don't fail for unbound session + #post_type = relationship('PostType', lazy='joined') def to_dict(self): return { diff --git a/redditrepostsleuth/core/model/misc_models.py b/redditrepostsleuth/core/model/misc_models.py index c883bf9..89fde2e 100644 --- a/redditrepostsleuth/core/model/misc_models.py +++ b/redditrepostsleuth/core/model/misc_models.py @@ -11,6 +11,7 @@ class JobStatus(Enum): TIMEOUT = auto() PROXYERROR = auto() ERROR = auto() + RATELIMIT = auto() @dataclass class BatchedPostRequestJob: diff --git a/redditrepostsleuth/core/services/responsebuilder.py b/redditrepostsleuth/core/services/responsebuilder.py index 5cc12b7..17d3fef 100644 --- a/redditrepostsleuth/core/services/responsebuilder.py +++ b/redditrepostsleuth/core/services/responsebuilder.py @@ -130,7 +130,7 @@ def build_sub_comment( try: return self.build_default_comment(search_results, message, **kwargs) - except KeyError: + except KeyError as e: log.warning('Custom repost template for %s has a bad slug: %s', monitored_sub.name, monitored_sub.repost_response_template) return self.build_default_comment(search_results, **kwargs) diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 1ff762e..46c65c3 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -3,7 +3,7 @@ import json import os import time -from asyncio import ensure_future, gather, run, TimeoutError +from asyncio import ensure_future, gather, run, TimeoutError, CancelledError from datetime import datetime from typing import List, Optional @@ -15,6 +15,7 @@ from redditrepostsleuth.core.db.databasemodels import Post from redditrepostsleuth.core.db.db_utils import get_db_engine from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager +from redditrepostsleuth.core.exception import RateLimitException, UtilApiException from redditrepostsleuth.core.logging import configure_logger from redditrepostsleuth.core.model.misc_models import BatchedPostRequestJob, JobStatus from redditrepostsleuth.core.util.helpers import get_reddit_instance, get_newest_praw_post_id, get_next_ids, \ @@ -36,6 +37,7 @@ config = Config() REMOVAL_REASONS_TO_SKIP = ['deleted', 'author', 'reddit', 'copyright_takedown'] +HEADERS = {'User-Agent': 'u/RepostSleuthBot - Submission Ingest (by u/BarryCarey)'} async def fetch_page(url: str, session: ClientSession) -> Optional[str]: @@ -45,12 +47,20 @@ async def fetch_page(url: str, session: ClientSession) -> Optional[str]: :param session: AIOHttp session to use :return: raw response from request """ - async with session.get(url, timeout=ClientTimeout(total=10)) as resp: + log.debug('Page fetch') + + async with session.get(url, timeout=ClientTimeout(total=10), headers=HEADERS) as resp: try: if resp.status == 200: log.debug('Successful fetch') - return await resp.text() + try: + return await resp.text() + except CancelledError: + log.error('Canceled on getting text') + raise UtilApiException('Canceled') else: + if resp.status == 429: + raise RateLimitException('Data API rate limit') log.info('Unexpected request status %s - %s', resp.status, url) return except (ClientOSError, TimeoutError): @@ -68,11 +78,15 @@ async def fetch_page_as_job(job: BatchedPostRequestJob, session: ClientSession) :rtype: BatchedPostRequestJob """ try: - async with session.get(job.url, timeout=ClientTimeout(total=10)) as resp: + async with session.get(job.url, timeout=ClientTimeout(total=10), headers=HEADERS) as resp: if resp.status == 200: log.debug('Successful fetch') job.status = JobStatus.SUCCESS + log.debug('Fetching response text') job.resp_data = await resp.text() + elif resp.status == 429: + log.warning('Data API Rate Limit') + job.status = JobStatus.RATELIMIT else: log.warning('Unexpected request status %s - %s', resp.status, job.url) job.status = JobStatus.ERROR @@ -106,7 +120,7 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: tasks = [] conn = TCPConnector(limit=0) - async with ClientSession(connector=conn) as session: + async with ClientSession(connector=conn, headers=HEADERS) as session: while True: try: chunk = list(itertools.islice(missing_ids, 100)) @@ -114,6 +128,7 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: break url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(chunk)}' + #url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(chunk)}' job = BatchedPostRequestJob(url, chunk, JobStatus.STARTED) tasks.append(ensure_future(fetch_page_as_job(job, session))) if len(tasks) >= 50 or len(chunk) == 0: @@ -139,10 +154,15 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: else: tasks.append(ensure_future(fetch_page_as_job(j, session))) + any_rate_limit = next((x for x in results if x.status == JobStatus.RATELIMIT), None) + if any_rate_limit: + log.info('Some jobs hit data rate limit, waiting') + await asyncio.sleep(10) + log.info('Sending %s posts to save queue', len(posts_to_save)) # save_new_posts.apply_async(([reddit_submission_to_post(submission) for submission in posts_to_save],)) - save_new_posts.apply_async((posts_to_save,)) + save_new_posts.apply_async((posts_to_save, True)) if len(chunk) == 0: break @@ -170,52 +190,60 @@ async def main() -> None: oldest_id = oldest_post.post_id await ingest_range(newest_id, oldest_id) - async with ClientSession() as session: - delay = 0 - while True: - ids_to_get = get_next_ids(newest_id, 100) - url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' + + delay = 0 + while True: + ids_to_get = get_next_ids(newest_id, 100) + url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' + #url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(ids_to_get)}' + async with ClientSession(headers=HEADERS) as session: try: + log.debug('Sending fetch request') results = await fetch_page(url, session) - except (ServerDisconnectedError, ClientConnectorError, ClientOSError, TimeoutError): + except (ServerDisconnectedError, ClientConnectorError, ClientOSError, TimeoutError, CancelledError, UtilApiException): log.warning('Error during fetch') await asyncio.sleep(2) continue - - if not results: + except RateLimitException: + log.warning('Hit Data API Rate Limit') + await asyncio.sleep(10) continue - res_data = json.loads(results) - if not res_data or not len(res_data['data']['children']): - log.info('No results') + if not results: + log.debug('No results') + continue + + res_data = json.loads(results) + if not res_data or not len(res_data['data']['children']): + log.info('No results') + continue + + log.info('%s results returned from API', len(res_data['data']['children'])) + if len(res_data['data']['children']) < 91: + delay += 1 + log.debug('Delay increased by 1. Current delay: %s', delay) + else: + if delay > 0: + delay -= 1 + log.debug('Delay decreased by 1. Current delay: %s', delay) + + posts_to_save = [] + for post in res_data['data']['children']: + if post['data']['removed_by_category'] in REMOVAL_REASONS_TO_SKIP: continue + posts_to_save.append(post['data']) - log.info('%s results returned from API', len(res_data['data']['children'])) - if len(res_data['data']['children']) < 90: - delay += 1 - log.debug('Delay increased by 1. Current delay: %s', delay) - else: - if delay > 0: - delay -= 1 - log.debug('Delay decreased by 1. Current delay: %s', delay) - - posts_to_save = [] - for post in res_data['data']['children']: - if post['data']['removed_by_category'] in REMOVAL_REASONS_TO_SKIP: - continue - posts_to_save.append(post['data']) - - log.info('Sending %s posts to save queue', len(posts_to_save)) - # queue_posts_for_ingest([reddit_submission_to_post(submission) for submission in posts_to_save]) - queue_posts_for_ingest(posts_to_save) + log.info('Sending %s posts to save queue', len(posts_to_save)) + # queue_posts_for_ingest([reddit_submission_to_post(submission) for submission in posts_to_save]) + queue_posts_for_ingest(posts_to_save) - ingest_delay = datetime.utcnow() - datetime.utcfromtimestamp( - res_data['data']['children'][0]['data']['created_utc']) - log.info('Current Delay: %s', ingest_delay) + ingest_delay = datetime.utcnow() - datetime.utcfromtimestamp( + res_data['data']['children'][0]['data']['created_utc']) + log.info('Current Delay: %s', ingest_delay) - newest_id = res_data['data']['children'][-1]['data']['id'] + newest_id = res_data['data']['children'][-1]['data']['id'] - time.sleep(delay) + time.sleep(delay) if __name__ == '__main__': From 2a1005453c8e016840bb6cebef41f299e1b27ed6 Mon Sep 17 00:00:00 2001 From: barry Date: Fri, 9 Feb 2024 20:31:46 -0500 Subject: [PATCH 02/10] most support for ban and remove OF messages --- redditrepostsleuth/core/config.py | 2 +- redditrepostsleuth/core/db/databasemodels.py | 4 +++ redditrepostsleuth/ingestsvc/ingestsvc.py | 7 ++-- .../submonitorsvc/monitored_sub_service.py | 33 +++++++++++-------- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/redditrepostsleuth/core/config.py b/redditrepostsleuth/core/config.py index 2718c0d..9d39502 100644 --- a/redditrepostsleuth/core/config.py +++ b/redditrepostsleuth/core/config.py @@ -214,7 +214,7 @@ def _initialize_attributes(self): 'default_text_crosspost_filter', 'default_text_max_days_old_filter', 'default_text_target_distance', - 'discord_logging_hook' + 'discord_logging_hook', ] diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index 56ee65e..fea58f7 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -352,10 +352,14 @@ class MonitoredSub(Base): adult_promoter_remove_post = Column(Boolean, default=False) adult_promoter_ban_user = Column(Boolean, default=False) adult_promoter_notify_mod_mail = Column(Boolean, default=False) + adult_promoter_removal_reason = Column(String(300)) + adult_promoter_ban_reason = Column(String(300)) high_volume_reposter_ban_user = Column(Boolean, default=False) high_volume_reposter_remove_post = Column(Boolean, default=False) high_volume_reposter_threshold = Column(Integer, default=100) high_volume_reposter_notify_mod_mail = Column(Boolean, default=False) + high_volume_reposter_removal_reason = Column(String(300)) + high_volume_reposter_ban_reason = Column(String(300)) post_checks = relationship("MonitoredSubChecks", back_populates='monitored_sub', cascade='all, delete', ) config_revisions = relationship("MonitoredSubConfigRevision", back_populates='monitored_sub', cascade='all, delete') diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 46c65c3..8f17965 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -60,6 +60,7 @@ async def fetch_page(url: str, session: ClientSession) -> Optional[str]: raise UtilApiException('Canceled') else: if resp.status == 429: + text = await resp.text() raise RateLimitException('Data API rate limit') log.info('Unexpected request status %s - %s', resp.status, url) return @@ -189,13 +190,13 @@ async def main() -> None: oldest_post = uow.posts.get_newest_post() oldest_id = oldest_post.post_id - await ingest_range(newest_id, oldest_id) + #await ingest_range(newest_id, oldest_id) delay = 0 while True: ids_to_get = get_next_ids(newest_id, 100) - url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' - #url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(ids_to_get)}' + #url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' + url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(ids_to_get)}' async with ClientSession(headers=HEADERS) as session: try: log.debug('Sending fetch request') diff --git a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py index 40e8e0d..1439bff 100644 --- a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py +++ b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py @@ -111,7 +111,11 @@ def handle_only_fans_check( f'Post by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='Onlyfans Removal' ) - self._remove_post(monitored_sub, self.reddit.submission(post.post_id)) + + self._remove_post( + monitored_sub.adult_promoter_removal_reason, + self.reddit.submission(post.post_id) + ) if monitored_sub.adult_promoter_ban_user: if self.notification_svc: @@ -179,7 +183,10 @@ def handle_high_volume_reposter_check( f'Post by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='High Volume Removal' ) - self._remove_post(monitored_sub, self.reddit.submission(post.post_id)) + self._remove_post( + monitored_sub.high_volume_reposter_removal_reason, + self.reddit.submission(post.post_id) + ) if monitored_sub.high_volume_reposter_ban_user: if self.notification_svc: @@ -302,7 +309,8 @@ def check_submission(self, monitored_sub: MonitoredSub, post: Post) -> Optional[ report_msg = self.response_builder.build_report_msg(monitored_sub.name, msg_values) self._report_submission(monitored_sub, submission, report_msg) self._lock_post(monitored_sub, submission) - self._remove_post(monitored_sub, submission) + if monitored_sub.remove_repost: + self._remove_post(monitored_sub, submission) self._send_mod_mail(monitored_sub, search_results) else: self._mark_post_as_oc(monitored_sub, submission) @@ -396,21 +404,20 @@ def _lock_comment(self, monitored_sub: MonitoredSub, comment: Comment) -> None: except Exception as e: log.exception('Failed to lock comment', exc_info=True) - def _remove_post(self, monitored_sub: MonitoredSub, submission: Submission, mod_note: str = None) -> None: + def _remove_post(self, removal_reason: str, submission: Submission, mod_note: str = None) -> None: """ Check if given sub wants posts removed. Remove is enabled @param monitored_sub: Monitored sub @param submission: Submission to remove """ - if monitored_sub.remove_repost: - try: - removal_reason_id = self._get_removal_reason_id(monitored_sub.removal_reason, submission.subreddit) - log.info('Attempting to remove post https://redd.it/%s with removal ID %s', submission.id, removal_reason_id) - submission.mod.remove(reason_id=removal_reason_id, mod_note=mod_note) - except Forbidden: - log.error('Failed to remove post https://redd.it/%s, no permission', submission.id) - except Exception as e: - log.exception('Failed to remove submission https://redd.it/%s', submission.id, exc_info=True) + try: + removal_reason_id = self._get_removal_reason_id(removal_reason, submission.subreddit) + log.info('Attempting to remove post https://redd.it/%s with removal ID %s', submission.id, removal_reason_id) + submission.mod.remove(reason_id=removal_reason_id, mod_note=mod_note) + except Forbidden: + log.error('Failed to remove post https://redd.it/%s, no permission', submission.id) + except Exception as e: + log.exception('Failed to remove submission https://redd.it/%s', submission.id, exc_info=True) def _get_removal_reason_id(self, removal_reason: str, subreddit: Subreddit) -> Optional[str]: if not removal_reason: From a74ce360fdd62b1ce8668ad48e36c69acc311965 Mon Sep 17 00:00:00 2001 From: barry Date: Sun, 11 Feb 2024 11:09:59 -0500 Subject: [PATCH 03/10] Working implementation of removal reasons --- docs/dev_docs/modifying_monitored_sub.md | 5 +++++ redditrepostsleuth/core/db/databasemodels.py | 9 +++++++-- .../core/util/default_bot_config.py | 4 ++++ .../submonitorsvc/monitored_sub_service.py | 10 +++++++--- tests/submonitorsvc/test_subMonitor.py | 19 +++++++++++++------ 5 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 docs/dev_docs/modifying_monitored_sub.md diff --git a/docs/dev_docs/modifying_monitored_sub.md b/docs/dev_docs/modifying_monitored_sub.md new file mode 100644 index 0000000..5d3e88d --- /dev/null +++ b/docs/dev_docs/modifying_monitored_sub.md @@ -0,0 +1,5 @@ + +### Adding or Removing Config Values +* Add / Remove config values in core/db/databasemodels.py +* Add/Remove in core/util/default_bot_config.py +* Update sub_monitor_exposed_config_options in the config json \ No newline at end of file diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index fea58f7..93741f1 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -199,7 +199,7 @@ class RepostSearch(Base): Index('idx_post_type_searched_at', 'post_type_id', 'searched_at'), Index('idx_by_subreddit_and_type', 'subreddit', 'source', 'post_type_id', 'matches_found'), Index('idx_source', 'source'), - Index('idx_matches_found', 'matches_found') + Index('idx_matches_found', 'searched_at', 'source', 'matches_found') ) id = Column(Integer, primary_key=True) post_id = Column(Integer, ForeignKey('post.id')) @@ -427,7 +427,12 @@ def to_dict(self): 'high_volume_reposter_ban_user': self.high_volume_reposter_ban_user, 'high_volume_reposter_remove_post': self.high_volume_reposter_remove_post, 'high_volume_reposter_threshold': self.high_volume_reposter_threshold, - 'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail + 'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail, + 'high_volume_reposter_removal_reason': self.high_volume_reposter_removal_reason, + 'high_volume_reposter_ban_reason': self.high_volume_reposter_ban_reason, + 'adult_promoter_removal_reason': self.adult_promoter_removal_reason, + 'adult_promoter_ban_reason': self.adult_promoter_ban_reason + } diff --git a/redditrepostsleuth/core/util/default_bot_config.py b/redditrepostsleuth/core/util/default_bot_config.py index 6a6977a..4211a00 100644 --- a/redditrepostsleuth/core/util/default_bot_config.py +++ b/redditrepostsleuth/core/util/default_bot_config.py @@ -33,9 +33,13 @@ "adult_promoter_remove_post": False, "adult_promoter_ban_user": False, "adult_promoter_notify_mod_mail": False, + "adult_promoter_ban_reason": None, + "adult_promoter_removal_reason": None, "high_volume_reposter_ban_user": False, "high_volume_reposter_remove_post": False, "high_volume_reposter_threshold": 150, "high_volume_reposter_notify_mod_mail": False, + "high_volume_reposter_removal_reason": None, + "high_volume_reposter_ban_reason": None } diff --git a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py index 1439bff..99d897e 100644 --- a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py +++ b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py @@ -123,7 +123,7 @@ def handle_only_fans_check( f'User [{post.author}](https://reddit.com/u/{post.author}) banned from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='Onlyfans Ban Issued' ) - self._ban_user(post.author, monitored_sub.name, user.notes) + self._ban_user(post.author, monitored_sub.name, monitored_sub.adult_promoter_ban_reason or user.notes) if monitored_sub.adult_promoter_notify_mod_mail: message_body = ADULT_PROMOTER_SUBMISSION_FOUND.format( @@ -194,7 +194,11 @@ def handle_high_volume_reposter_check( f'User [{post.author}](https://reddit.com/u/{post.author}) banned from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='High Volume Reposter Ban Issued' ) - self._ban_user(post.author, monitored_sub.name, 'High volume of reposts detected by Repost Sleuth') + self._ban_user( + post.author, + monitored_sub.name, + monitored_sub.high_volume_reposter_ban_reason or 'High volume of reposts detected by Repost Sleuth' + ) if monitored_sub.high_volume_reposter_notify_mod_mail: message_body = HIGH_VOLUME_REPOSTER_FOUND.format( @@ -310,7 +314,7 @@ def check_submission(self, monitored_sub: MonitoredSub, post: Post) -> Optional[ self._report_submission(monitored_sub, submission, report_msg) self._lock_post(monitored_sub, submission) if monitored_sub.remove_repost: - self._remove_post(monitored_sub, submission) + self._remove_post(monitored_sub.removal_reason, submission) self._send_mod_mail(monitored_sub, search_results) else: self._mark_post_as_oc(monitored_sub, submission) diff --git a/tests/submonitorsvc/test_subMonitor.py b/tests/submonitorsvc/test_subMonitor.py index 3bd369f..fe41e6b 100644 --- a/tests/submonitorsvc/test_subMonitor.py +++ b/tests/submonitorsvc/test_subMonitor.py @@ -121,7 +121,12 @@ def test__handle_only_fans_flagged_user_ban_user(self, mock_ban_user, mock_remov def test__handle_only_fans_flagged_user_remove_post(self, mock_ban_user, mock_remove_post): user_review = UserReview(content_links_found=1, username='test_user', notes='Profile links match onlyfans.com') post = Post(subreddit='test_subreddit', author='test_user') - monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=True, adult_promoter_ban_user=False) + monitored_sub = MonitoredSub( + name='test_subreddit', + adult_promoter_remove_post=True, + adult_promoter_ban_user=False, + adult_promoter_removal_reason='Removed' + ) mock_uow = MagicMock( user_review=MagicMock(get_by_username=MagicMock(return_value=user_review)), user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) @@ -133,7 +138,7 @@ def test__handle_only_fans_flagged_user_remove_post(self, mock_ban_user, mock_re sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) mock_ban_user.assert_not_called() - mock_remove_post.assert_called_once_with(monitored_sub, ANY) + mock_remove_post.assert_called_once_with('Removed', ANY) @patch.object(MonitoredSubService, '_remove_post') @patch.object(MonitoredSubService, '_ban_user') @@ -172,12 +177,13 @@ def test__handle_high_volume_reposter_check_over_threshold_remove(self, mock_ban high_volume_reposter_ban_user=False, high_volume_reposter_threshold=100, high_volume_reposter_notify_mod_mail=False, - high_volume_reposter_remove_post=True + high_volume_reposter_remove_post=True, + high_volume_reposter_removal_reason='Removed' ) post = Post(subreddit='test_subreddit', author='test_user') sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) mock_ban_user.assert_not_called() - mock_remove_post.assert_called_once_with(monitored_sub, ANY) + mock_remove_post.assert_called_once_with('Removed', ANY) mock_response_handler.send_mod_mail.assert_not_called() @patch.object(MonitoredSubService, '_remove_post') @@ -195,12 +201,13 @@ def test__handle_high_volume_reposter_check_over_threshold_remove_and_ban(self, high_volume_reposter_ban_user=True, high_volume_reposter_threshold=100, high_volume_reposter_notify_mod_mail=False, - high_volume_reposter_remove_post=True + high_volume_reposter_remove_post=True, + high_volume_reposter_removal_reason='Removed' ) post = Post(subreddit='test_subreddit', author='test_user') sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) mock_ban_user.assert_called_once_with('test_user', 'test_subreddit', 'High volume of reposts detected by Repost Sleuth') - mock_remove_post.assert_called_once_with(monitored_sub, ANY) + mock_remove_post.assert_called_once_with('Removed', ANY) mock_response_handler.send_mod_mail.assert_not_called() @patch.object(MonitoredSubService, '_remove_post') From 61dff2f068b34bfd2e18be59c9ff94ba7ef6753c Mon Sep 17 00:00:00 2001 From: barry Date: Sun, 11 Feb 2024 17:11:02 -0500 Subject: [PATCH 04/10] Add mod note to removal --- redditrepostsleuth/submonitorsvc/monitored_sub_service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py index 99d897e..8856d3f 100644 --- a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py +++ b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py @@ -185,7 +185,8 @@ def handle_high_volume_reposter_check( ) self._remove_post( monitored_sub.high_volume_reposter_removal_reason, - self.reddit.submission(post.post_id) + self.reddit.submission(post.post_id), + mod_note='High volume of reposts detected by Repost Sleuth' ) if monitored_sub.high_volume_reposter_ban_user: From 3dbb15ce4bea875268a43ffb35d024ca05db9c9d Mon Sep 17 00:00:00 2001 From: Matthew Carey Date: Sun, 18 Feb 2024 17:42:40 -0500 Subject: [PATCH 05/10] Feature/redgif (#364) * working redgifs ingest. Proxy not enabled * Cleanup and final redgif support * Cleanup and final redgif support * failed remove post test * added redgifs package --- .../celery/task_logic/ingest_task_logic.py | 75 +++++++++++++++-- .../core/celery/tasks/ingest_tasks.py | 48 +++++++++-- redditrepostsleuth/core/exception.py | 6 +- .../core/services/redgifs_token_manager.py | 84 +++++++++++++++++++ redditrepostsleuth/core/util/constants.py | 2 + redditrepostsleuth/core/util/helpers.py | 49 +++++++---- redditrepostsleuth/core/util/imagehashing.py | 12 ++- redditrepostsleuth/ingestsvc/ingestsvc.py | 2 +- requirements.txt | 3 +- tests/submonitorsvc/test_subMonitor.py | 16 +++- worker-requirements.txt | 3 +- 11 files changed, 261 insertions(+), 39 deletions(-) create mode 100644 redditrepostsleuth/core/services/redgifs_token_manager.py diff --git a/redditrepostsleuth/core/celery/task_logic/ingest_task_logic.py b/redditrepostsleuth/core/celery/task_logic/ingest_task_logic.py index 9a9fef0..23a2bee 100644 --- a/redditrepostsleuth/core/celery/task_logic/ingest_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/ingest_task_logic.py @@ -1,23 +1,75 @@ import logging +import os from hashlib import md5 from typing import Optional +from urllib.parse import urlparse import imagehash +import redgifs +from redgifs import HTTPException from redditrepostsleuth.core.db.databasemodels import Post, PostHash from redditrepostsleuth.core.exception import ImageRemovedException, ImageConversionException, InvalidImageUrlException, \ GalleryNotProcessed -from redditrepostsleuth.core.util.imagehashing import log, generate_img_by_url_requests +from redditrepostsleuth.core.proxy_manager import ProxyManager +from redditrepostsleuth.core.services.redgifs_token_manager import RedGifsTokenManager +from redditrepostsleuth.core.util.constants import GENERIC_USER_AGENT +from redditrepostsleuth.core.util.imagehashing import generate_img_by_url_requests from redditrepostsleuth.core.util.objectmapping import reddit_submission_to_post log = logging.getLogger(__name__) -def pre_process_post(submission: dict) -> Optional[Post]: + +def get_redgif_id_from_url(url: str) -> Optional[str]: + parsed_url = urlparse(url) + id, _ = os.path.splitext(parsed_url.path.replace('/i/', '')) + return id + +def get_redgif_image_url(reddit_url: str, token: str, proxy: str = None) -> Optional[str]: + + id = get_redgif_id_from_url(reddit_url) + if not id: + log.error('Failed to parse RedGifs ID from %s', reddit_url) + return + + api = redgifs.API() + api.http._proxy = {'http': proxy, 'https': proxy} + api.http.headers.update({'User-Agent': GENERIC_USER_AGENT, 'authorization': f'Bearer {token}'}) + try: + gif = api.get_gif(id) + except Exception as e: + log.error('') + return gif.urls.hd + + +def pre_process_post( + submission: dict, + proxy_manager: ProxyManager, + redgif_manager: RedGifsTokenManager, + domains_to_proxy: list[str] +) -> Optional[Post]: post = reddit_submission_to_post(submission) + proxy = None + parsed_url = urlparse(post.url) + if parsed_url.netloc in domains_to_proxy: + proxy = proxy_manager.get_proxy().address + if post.post_type_id == 2: # image - process_image_post(post) + + # Hacky RedGif support. Will need to be refactored if we have to do similar for other sites + redgif_url = None + if 'redgif' in post.url: + token = redgif_manager.get_redgifs_token() + try: + redgif_url = get_redgif_image_url(submission['url'], token) + except HTTPException as e: + if 'code' in e.error and e.error['code'] == 'TokenDecodeError': + redgif_manager.remove_redgifs_token(proxy or 'localhost') + raise e + + process_image_post(post, url=redgif_url, proxy=proxy) elif post.post_type_id == 6: # gallery process_gallery(post, submission) @@ -28,12 +80,21 @@ def pre_process_post(submission: dict) -> Optional[Post]: return post -def process_image_post(post: Post, hash_size: int = 16) -> Post: - - log.info('Hashing image with URL: %s', post.url) +def process_image_post(post: Post, url: str = None, proxy: str = None, hash_size: int = 16) -> Post: + """ + Process an image post to generate the required hashes + :param proxy: Proxy to request image with + :param post: post object + :param url: Alternate URL to use + :param hash_size: Size of hash + :return: Post object with hashes + """ + log.debug('Hashing image with URL: %s', post.url) + if url: + log.info('Hashing %s', post.url) try: - img = generate_img_by_url_requests(post.url) + img = generate_img_by_url_requests(url or post.url, proxy=proxy) except ImageConversionException as e: log.warning('Image conversion error: %s', e) raise diff --git a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py index a2b86bb..4bb4726 100644 --- a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py @@ -1,17 +1,47 @@ +import json +import random +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Optional + +import requests +from celery import Task +from redgifs import HTTPException from sqlalchemy.exc import IntegrityError from redditrepostsleuth.core.celery import celery from redditrepostsleuth.core.celery.basetasks import SqlAlchemyTask -from redditrepostsleuth.core.celery.task_logic.ingest_task_logic import pre_process_post +from redditrepostsleuth.core.celery.task_logic.ingest_task_logic import pre_process_post, get_redgif_image_url +from redditrepostsleuth.core.config import Config +from redditrepostsleuth.core.db.db_utils import get_db_engine +from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager from redditrepostsleuth.core.exception import InvalidImageUrlException, GalleryNotProcessed, ImageConversionException, \ - ImageRemovedException + ImageRemovedException, RedGifsTokenException from redditrepostsleuth.core.logging import get_configured_logger +from redditrepostsleuth.core.proxy_manager import ProxyManager +from redditrepostsleuth.core.services.eventlogging import EventLogging +from redditrepostsleuth.core.services.redgifs_token_manager import RedGifsTokenManager +from redditrepostsleuth.core.util.constants import GENERIC_USER_AGENT from redditrepostsleuth.core.util.objectmapping import reddit_submission_to_post log = get_configured_logger('redditrepostsleuth') - -@celery.task(bind=True, base=SqlAlchemyTask, ignore_reseults=True, serializer='pickle', autoretry_for=(ConnectionError,ImageConversionException,GalleryNotProcessed), retry_kwargs={'max_retries': 10, 'countdown': 300}) +@dataclass +class RedGifsToken: + token: str + expires_at: datetime + proxy: str + +class IngestTask(Task): + def __init__(self): + self.config = Config() + self.uowm = UnitOfWorkManager(get_db_engine(self.config)) + self.event_logger = EventLogging() + self._redgifs_token_manager = RedGifsTokenManager() + self._proxy_manager = ProxyManager(self.uowm, 1000) + self.domains_to_proxy = [] + +@celery.task(bind=True, base=IngestTask, ignore_reseults=True, serializer='pickle', autoretry_for=(ConnectionError,ImageConversionException,GalleryNotProcessed, HTTPException), retry_kwargs={'max_retries': 10, 'countdown': 300}) def save_new_post(self, submission: dict, repost_check: bool = True): # TODO: temp fix until I can fix imgur gifs @@ -24,16 +54,22 @@ def save_new_post(self, submission: dict, repost_check: bool = True): return try: - post = pre_process_post(submission) + post = pre_process_post(submission, self._proxy_manager, self._redgifs_token_manager, []) except (ImageRemovedException, InvalidImageUrlException) as e: return + except GalleryNotProcessed as e: + log.warning('Gallery not finished processing') + raise e + except Exception as e: + log.exception('Failed during post pre-process') + return if not post: return monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit) if monitored_sub and monitored_sub.active: - log.info('Sending ingested post to monitored sub queue') + log.info('Sending ingested post to monitored sub queue for %s', monitored_sub.name) celery.send_task('redditrepostsleuth.core.celery.tasks.monitored_sub_tasks.sub_monitor_check_post', args=[post.post_id, monitored_sub], queue='submonitor', countdown=20) diff --git a/redditrepostsleuth/core/exception.py b/redditrepostsleuth/core/exception.py index b3e8099..2e8d32a 100644 --- a/redditrepostsleuth/core/exception.py +++ b/redditrepostsleuth/core/exception.py @@ -70,4 +70,8 @@ def __init__(self, message): class UserNotFound(RepostSleuthException): def __init__(self, message): - super(UserNotFound, self).__init__(message) \ No newline at end of file + super(UserNotFound, self).__init__(message) + +class RedGifsTokenException(RepostSleuthException): + def __init__(self, message): + super(RedGifsTokenException, self).__init__(message) diff --git a/redditrepostsleuth/core/services/redgifs_token_manager.py b/redditrepostsleuth/core/services/redgifs_token_manager.py new file mode 100644 index 0000000..ff9a20d --- /dev/null +++ b/redditrepostsleuth/core/services/redgifs_token_manager.py @@ -0,0 +1,84 @@ +import json +import logging + +import requests +from redis import Redis + +from redditrepostsleuth.core.config import Config +from redditrepostsleuth.core.exception import RedGifsTokenException +from redditrepostsleuth.core.util.constants import GENERIC_USER_AGENT + +log = logging.getLogger(__name__) + +""" +Class for managing and caching RedGifs API tokens. Currently overkill but if we need to backfill the database or +API rate limits get tight this will support caching a token for each proxy to Redis +""" +class RedGifsTokenManager: + def __init__(self): + config = Config() + self.redis = Redis( + host=config.redis_host, + port=config.redis_port, + db=config.redis_database, + password=config.redis_password, + decode_responses=True + ) + + + def _cache_token(self, key: str, token: str) -> None: + """ + Take a given token and cache it to Redis + :param key: key of the token + :param token: API token + """ + log.info('Caching token for %s', key) + self.redis.set(f'redgifs-token:{key}', token, ex=82800) + + def remove_redgifs_token(self, key: str) -> None: + """ + Removed a cached token from Redis with a given key + :param key: key to remove + """ + log.info('Removing token for %s', key) + self.redis.delete(f'redgifs-token:{key}') + + + def get_redgifs_token(self, address: str = 'localhost') -> str: + """ + Either return an existing cached token or create a new one + :param address: address of the proxy being used + :return: Token + """ + cached_token = self.redis.get(f'redgifs-token:{address}') + if not cached_token: + return self._request_and_cache_token(address) + + log.debug('Found cached token for %s', address) + return cached_token + + + def _request_and_cache_token(self, proxy_address: str = 'localhost') -> str: + """ + Hit the Redgif API and request a new auth token. Cache it to Redis + :param proxy_address: Proxy to use, if any + :return: Token + """ + proxies = None + if proxy_address != 'localhost': + proxies = {'http': f'https://{proxy_address}', 'https': f'http://{proxy_address}'} + + token_res = requests.get( + 'https://api.redgifs.com/v2/auth/temporary', + headers={'User-Agent': GENERIC_USER_AGENT}, + proxies=proxies + ) + + if token_res.status_code != 200: + log.error('Failed to get RedGif token. Status Code %s', token_res.status_code) + raise RedGifsTokenException(f'Failed to get RedGif token. Status Code {token_res.status_code}') + + token_data = json.loads(token_res.text) + + self._cache_token(proxy_address or 'localhost', token_data['token']) + return token_data['token'] \ No newline at end of file diff --git a/redditrepostsleuth/core/util/constants.py b/redditrepostsleuth/core/util/constants.py index cd2f3d5..e808a9c 100644 --- a/redditrepostsleuth/core/util/constants.py +++ b/redditrepostsleuth/core/util/constants.py @@ -15,6 +15,8 @@ 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36" } +GENERIC_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' + REDDIT_REMOVAL_REASONS = ['deleted', 'author', 'reddit', 'copyright_takedown', 'content_takedown'] EXCLUDE_FROM_TOP_REPOSTERS = [ diff --git a/redditrepostsleuth/core/util/helpers.py b/redditrepostsleuth/core/util/helpers.py index 06347d3..d3c9d03 100644 --- a/redditrepostsleuth/core/util/helpers.py +++ b/redditrepostsleuth/core/util/helpers.py @@ -250,33 +250,52 @@ def get_default_image_search_settings(config: Config) -> ImageSearchSettings: ) def get_image_search_settings_from_request(req, config: Config) -> ImageSearchSettings: - return ImageSearchSettings( + search_settings = ImageSearchSettings( req.get_param_as_int('target_match_percent', required=True, default=None) or config.default_image_target_match, config.default_image_target_annoy_distance, target_title_match=req.get_param_as_int('target_title_match', required=False, default=None) or config.default_image_target_title_match, - filter_dead_matches=req.get_param_as_bool('filter_dead_matches', required=False, - default=None) or config.default_image_dead_matches_filter, - filter_removed_matches=req.get_param_as_bool('filter_removed_matches', required=False, - default=None) or config.default_image_removed_match_filter, - only_older_matches=req.get_param_as_bool('only_older_matches', required=False, - default=None) or config.default_image_only_older_matches, - filter_same_author=req.get_param_as_bool('filter_same_author', required=False, - default=None) or config.default_image_same_author_filter, - filter_crossposts=req.get_param_as_bool('filter_crossposts', required=False, - default=None) or config.default_image_crosspost_filter, + filter_dead_matches=req.get_param_as_bool('filter_dead_matches', required=False, default=None), + filter_removed_matches=req.get_param_as_bool('filter_removed_matches', required=False, default=None), + only_older_matches=req.get_param_as_bool('only_older_matches', required=False, default=None), + filter_same_author=req.get_param_as_bool('filter_same_author', required=False, default=None), + filter_crossposts=req.get_param_as_bool('include_crossposts', required=False, default=None), target_meme_match_percent=req.get_param_as_int('target_meme_match_percent', required=False, default=None) or config.default_image_target_meme_match, - meme_filter=req.get_param_as_bool('meme_filter', required=False, - default=None) or config.default_image_meme_filter, - same_sub=req.get_param_as_bool('same_sub', required=False, - default=None) or config.default_image_same_sub_filter, + meme_filter=req.get_param_as_bool('meme_filter', required=False, default=None), + same_sub=req.get_param_as_bool('same_sub', required=False, default=None), max_days_old=req.get_param_as_int('max_days_old', required=False, default=None) or config.default_link_max_days_old_filter, max_depth=10000 ) + if search_settings.filter_dead_matches is None: + search_settings.filter_dead_matches = config.default_image_dead_matches_filter + + if search_settings.filter_removed_matches is None: + search_settings.filter_removed_matches = config.default_image_removed_match_filter + + if search_settings.only_older_matches is None: + search_settings.only_older_matches = config.default_image_only_older_matches + + if search_settings.filter_same_author is None: + search_settings.filter_same_author = config.default_image_same_author_filter + + if search_settings.meme_filter is None: + search_settings.meme_filter = config.default_image_meme_filter + + if search_settings.filter_crossposts is None: + search_settings.filter_crossposts = config.default_image_crosspost_filter + else: + search_settings.filter_crossposts = not search_settings.filter_crossposts + + if search_settings.same_sub is None: + search_settings.same_sub = config.default_image_same_sub_filter + + + return search_settings + def get_default_link_search_settings(config: Config) -> SearchSettings: return SearchSettings( diff --git a/redditrepostsleuth/core/util/imagehashing.py b/redditrepostsleuth/core/util/imagehashing.py index fe010b6..c7caab6 100644 --- a/redditrepostsleuth/core/util/imagehashing.py +++ b/redditrepostsleuth/core/util/imagehashing.py @@ -12,6 +12,7 @@ from redditrepostsleuth.core.db.databasemodels import Post from redditrepostsleuth.core.exception import ImageConversionException, ImageRemovedException, InvalidImageUrlException +from redditrepostsleuth.core.util.constants import GENERIC_USER_AGENT log = logging.getLogger(__name__) @@ -51,23 +52,28 @@ def generate_img_by_url(url: str) -> Image: return img if img else None -def generate_img_by_url_requests(url: str) -> Optional[Image]: +def generate_img_by_url_requests(url: str, proxy: str = None) -> Optional[Image]: """ Take a URL and generate a PIL image + :param proxy: Optional proxy to use with request :param url: URL to get :return: PIL image """ if 'redd.it' in url: useragent = 'repostsleuthbot:v1.0.3 Image Hasher (by /u/barrycarey)' else: - useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' + useragent = GENERIC_USER_AGENT headers = { 'User-Agent': useragent } + proxies = None + if proxy: + proxies = {'http': proxy, 'https': proxy} + try: - res = requests.get(url, headers=headers, timeout=7) + res = requests.get(url, headers=headers, timeout=7, proxies=proxies) except (ConnectionError, Timeout) as e: raise ImageConversionException(str(e)) diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 8f17965..28aa4b1 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -190,7 +190,7 @@ async def main() -> None: oldest_post = uow.posts.get_newest_post() oldest_id = oldest_post.post_id - #await ingest_range(newest_id, oldest_id) + await ingest_range(newest_id, oldest_id) delay = 0 while True: diff --git a/requirements.txt b/requirements.txt index 1dfe0aa..d271954 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,5 @@ aiohttp==3.9.0 pyjwt==2.8.0 gunicorn==21.2.0 falcon==3.1.1 -cryptography==41.0.6 \ No newline at end of file +cryptography==41.0.6 +redgifs==1.9.0 \ No newline at end of file diff --git a/tests/submonitorsvc/test_subMonitor.py b/tests/submonitorsvc/test_subMonitor.py index fe41e6b..c745db9 100644 --- a/tests/submonitorsvc/test_subMonitor.py +++ b/tests/submonitorsvc/test_subMonitor.py @@ -1,6 +1,8 @@ from unittest import TestCase from unittest.mock import MagicMock, Mock, patch, ANY +from praw.models import Submission + from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import Post, MonitoredSub, PostType, UserReview, UserWhitelist from redditrepostsleuth.submonitorsvc.monitored_sub_service import MonitoredSubService @@ -170,7 +172,10 @@ def test__handle_high_volume_reposter_check_over_threshold_remove(self, mock_ban user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) ) mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + submission = Submission( MagicMock(), id='11') + sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), + MagicMock(submission=MagicMock(return_value=submission)), MagicMock(), + mock_response_handler, config=MagicMock()) monitored_sub = MonitoredSub( name='test_subreddit', @@ -183,7 +188,7 @@ def test__handle_high_volume_reposter_check_over_threshold_remove(self, mock_ban post = Post(subreddit='test_subreddit', author='test_user') sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) mock_ban_user.assert_not_called() - mock_remove_post.assert_called_once_with('Removed', ANY) + mock_remove_post.assert_called_once_with('Removed', submission, mod_note=ANY) mock_response_handler.send_mod_mail.assert_not_called() @patch.object(MonitoredSubService, '_remove_post') @@ -194,7 +199,10 @@ def test__handle_high_volume_reposter_check_over_threshold_remove_and_ban(self, user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) ) mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + submission = Submission(MagicMock(), id='11') + sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), + MagicMock(submission=MagicMock(return_value=submission)), MagicMock(), + mock_response_handler, config=MagicMock()) monitored_sub = MonitoredSub( name='test_subreddit', @@ -207,7 +215,7 @@ def test__handle_high_volume_reposter_check_over_threshold_remove_and_ban(self, post = Post(subreddit='test_subreddit', author='test_user') sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) mock_ban_user.assert_called_once_with('test_user', 'test_subreddit', 'High volume of reposts detected by Repost Sleuth') - mock_remove_post.assert_called_once_with('Removed', ANY) + mock_remove_post.assert_called_once_with('Removed', submission, mod_note=ANY) mock_response_handler.send_mod_mail.assert_not_called() @patch.object(MonitoredSubService, '_remove_post') diff --git a/worker-requirements.txt b/worker-requirements.txt index 7548efc..d2232e1 100644 --- a/worker-requirements.txt +++ b/worker-requirements.txt @@ -11,4 +11,5 @@ distance==0.1.3 pydantic==1.10.9 sentry-sdk==1.29.2 pyjwt==2.8.0 -cryptography==41.0.6 \ No newline at end of file +cryptography==41.0.6 +redgifs==1.9.0 \ No newline at end of file From 12373700a212ffa10c8a09f28d6db78742a1d68f Mon Sep 17 00:00:00 2001 From: barry Date: Sat, 2 Mar 2024 12:42:58 -0500 Subject: [PATCH 06/10] Tweaks to OF checking --- redditrepostsleuth/core/celery/admin_tasks.py | 3 +- .../core/util/onlyfans_handling.py | 50 +++++++++++-------- .../queue_monitor_svc/queue_monitor.py | 7 ++- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/redditrepostsleuth/core/celery/admin_tasks.py b/redditrepostsleuth/core/celery/admin_tasks.py index 624d703..1f9b276 100644 --- a/redditrepostsleuth/core/celery/admin_tasks.py +++ b/redditrepostsleuth/core/celery/admin_tasks.py @@ -131,6 +131,7 @@ def check_user_for_only_fans(self, username: str) -> None: if username in skip_names: log.info('Skipping name %s', username) return + try: with self.uowm.start() as uow: user = uow.user_review.get_by_username(username) @@ -138,7 +139,7 @@ def check_user_for_only_fans(self, username: str) -> None: if user: delta = datetime.utcnow() - user.last_checked if delta.days < 30: - log.debug('Skipping existing user %s, last check was %s days ago', username, delta.days) + log.info('Skipping existing user %s, last check was %s days ago', username, delta.days) return user.content_links_found = False user.notes = None diff --git a/redditrepostsleuth/core/util/onlyfans_handling.py b/redditrepostsleuth/core/util/onlyfans_handling.py index bafbdb9..68dddbf 100644 --- a/redditrepostsleuth/core/util/onlyfans_handling.py +++ b/redditrepostsleuth/core/util/onlyfans_handling.py @@ -115,6 +115,10 @@ def get_profile_links(username: str) -> list[str]: url = f'{config.util_api}/profile?username={username}' response = fetch_from_util_api(url) + if response.status_code == 404: + log.info('Redditor %s no longer exists', username) + raise UserNotFound(f'Redditor {username} no longer exists') + if response.status_code != 200: log.warning('Non 200 return code %s from Util API', response.status_code) raise UtilApiException(f'Unexpected status {response.status_code} from util API') @@ -152,35 +156,39 @@ def get_links_from_comments(username: str) -> list[str]: url = f'{config.util_api}/reddit/user-comment?username={username}' response = fetch_from_util_api(url) - if response.status_code == 404: - raise UserNotFound(f'User {username} does not exist or is banned') + match response.status_code: + case 404: + raise UserNotFound(f'User {username} does not exist or is banned') + case 403: + log.warning('Got unauthorized when checking user comments for %s', username) + raise UserNotFound(f'User {username} does not exist or is banned') + case 429: + log.warning('Rate limited') + raise UtilApiException(f'Rate limited') + case 200: + response_json = json.loads(response.text) + all_urls = [] - if response.status_code == 403: - log.warning('Got unauthorized when checking user comments for %s', username) - return [] - - if response.status_code != 200: - log.warning('Unexpected status %s from util API', response.status_code) - raise UtilApiException(f'Unexpected status {response.status_code} from util API') + if not response_json: + log.warning('Bad data from Util api') + raise UtilApiException(f'Unexpected status {response.status_code} from util API') - response_json = json.loads(response.text) - all_urls = [] + if not response_json['data']['children']: + log.debug('No comment data returned for %s', username) + return [] - if not response_json: - log.warning('Bad data from Util api') - raise UtilApiException(f'Unexpected status {response.status_code} from util API') + for comment in response_json['data']['children']: + all_urls += re.findall(r'href=[\'"]?([^\'" >]+)', comment['data']['body_html']) + log.debug('User %s has %s comment links', username, len(all_urls)) - if not response_json['data']['children']: - log.warning('No comment data returned for %s', username) - return [] + return list(set(all_urls)) - for comment in response_json['data']['children']: - all_urls += re.findall(r'href=[\'"]?([^\'" >]+)', comment['data']['body_html']) + case _ : + log.warning('Unexpected status %s from util API', response.status_code) + raise UtilApiException(f'Unexpected status {response.status_code} from util API') - log.debug('User %s has %s comment links', username, len(all_urls)) - return list(set(all_urls)) def get_links_from_comments_praw(username: str, reddit: Reddit) -> list[str]: all_urls = [] diff --git a/redditrepostsleuth/queue_monitor_svc/queue_monitor.py b/redditrepostsleuth/queue_monitor_svc/queue_monitor.py index e13cd8e..05a6410 100644 --- a/redditrepostsleuth/queue_monitor_svc/queue_monitor.py +++ b/redditrepostsleuth/queue_monitor_svc/queue_monitor.py @@ -2,6 +2,7 @@ import time import redis +from redis import ResponseError from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.logging import get_configured_logger @@ -23,8 +24,12 @@ def log_queue_size(event_logger): queue_name = queue.decode('utf-8').replace('_kombu.binding.', '') if len(queue_name) > 30 or queue_name in skip_keys or 'celery' in queue_name: continue + try: + queue_length = client.llen(queue_name) + except ResponseError as e: + continue event_logger.save_event( - CeleryQueueSize(queue_name, client.llen(queue_name), event_type='queue_update', env=os.getenv('RUN_ENV', 'dev'))) + CeleryQueueSize(queue_name, queue_length, event_type='queue_update', env=os.getenv('RUN_ENV', 'dev'))) time.sleep(2) except ConnectionError as e: log.error('Failed to connect to Redis') From 4fb925db3e9ce9c24f2b807f4555a6b2a8b5bd37 Mon Sep 17 00:00:00 2001 From: barry Date: Mon, 11 Mar 2024 19:04:29 -0400 Subject: [PATCH 07/10] Tweaks to OF checking --- docker-compose-infra.yml | 2 +- docker-compose.yml | 4 +- redditrepostsleuth/core/celery/admin_tasks.py | 48 ++++++++++++- .../core/celery/tasks/adult_promoter_tasks.py | 71 +++++++++++++++++++ .../core/celery/tasks/ingest_tasks.py | 3 +- redditrepostsleuth/core/db/databasemodels.py | 1 + .../core/util/onlyfans_handling.py | 29 +++++--- redditrepostsleuth/ingestsvc/ingestsvc.py | 8 +-- redditrepostsleuth/ingestsvc/requirements.txt | 3 +- 9 files changed, 149 insertions(+), 20 deletions(-) create mode 100644 redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py diff --git a/docker-compose-infra.yml b/docker-compose-infra.yml index 5c67ca8..27771b2 100644 --- a/docker-compose-infra.yml +++ b/docker-compose-infra.yml @@ -31,7 +31,7 @@ services: - '6379:6379' volumes: - /config/redis:/data - command: redis-server --save 60 1 + command: redis-server --save 550 1 --maxmemory 40gb influxdb: image: influxdb:latest diff --git a/docker-compose.yml b/docker-compose.yml index 422e872..ab1d937 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -181,9 +181,9 @@ services: - .env environment: - RUN_ENV=production - - LOG_LEVEL=INFO + - LOG_LEVEL=WARNING - CELERY_IMPORTS=redditrepostsleuth.core.celery.admin_tasks - entrypoint: celery -A redditrepostsleuth.core.celery worker -Q onlyfans_check -n onlyfans_worker --autoscale=8,5 + entrypoint: celery -A redditrepostsleuth.core.celery worker -Q onlyfans_check -n onlyfans_worker --autoscale=10,5 subreddit_config_update_worker: container_name: subreddit-config-update-worker diff --git a/redditrepostsleuth/core/celery/admin_tasks.py b/redditrepostsleuth/core/celery/admin_tasks.py index 1f9b276..2c6fb28 100644 --- a/redditrepostsleuth/core/celery/admin_tasks.py +++ b/redditrepostsleuth/core/celery/admin_tasks.py @@ -17,7 +17,8 @@ from redditrepostsleuth.core.exception import UtilApiException, UserNotFound from redditrepostsleuth.core.logfilters import ContextFilter from redditrepostsleuth.core.logging import log, configure_logger -from redditrepostsleuth.core.util.onlyfans_handling import check_user_for_promoter_links +from redditrepostsleuth.core.util.onlyfans_handling import check_user_for_promoter_links, \ + check_user_comments_for_promoter_links log = configure_logger( name='redditrepostsleuth', @@ -124,7 +125,7 @@ def update_subreddit_config_from_database(self, monitored_sub: MonitoredSub, use ) -@celery.task(bind=True, base=AdminTask, autoretry_for=(UtilApiException,ConnectionError,TooManyRequests), retry_kwards={'max_retries': 3}) +@celery.task(bind=True, base=AdminTask, autoretry_for=(UtilApiException,ConnectionError,TooManyRequests), retry_kwards={'max_retries': 3, 'retry_backoff': True}) def check_user_for_only_fans(self, username: str) -> None: skip_names = ['[deleted]', 'AutoModerator'] @@ -145,7 +146,7 @@ def check_user_for_only_fans(self, username: str) -> None: user.notes = None user.last_checked = func.utc_timestamp() - log.debug('Checking user %s', username) + log.info('Checking user %s', username) if not user: user = UserReview(username=username) try: @@ -164,5 +165,46 @@ def check_user_for_only_fans(self, username: str) -> None: raise e except IntegrityError: pass + except Exception as e: + log.exception('') + + +@celery.task(bind=True, base=AdminTask, autoretry_for=(UtilApiException,ConnectionError,TooManyRequests), retry_kwards={'max_retries': 3}) +def check_user_comments_for_only_fans(self, username: str) -> None: + """ + This should be run after the profile check so we don't do any timeframe checking + :param self: + :param username: + :return: + """ + skip_names = ['[deleted]', 'AutoModerator'] + + if username in skip_names: + log.info('Skipping name %s', username) + return + + try: + with self.uowm.start() as uow: + user = uow.user_review.get_by_username(username) + + if not user: + log.error('User not found: %s', username) + + try: + result = check_user_comments_for_promoter_links(username) + except UserNotFound as e: + log.warning(e) + return + + if result: + log.info('Promoter found: %s - %s', username, str(result)) + user.content_links_found = True + user.notes = str(result) + uow.user_review.add(user) + uow.commit() + except (UtilApiException, ConnectionError, TooManyRequests) as e: + raise e + except IntegrityError: + pass except Exception as e: log.exception('') \ No newline at end of file diff --git a/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py b/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py new file mode 100644 index 0000000..30980a1 --- /dev/null +++ b/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py @@ -0,0 +1,71 @@ +from celery import Task +from prawcore import TooManyRequests +from redis import Redis + +from redditrepostsleuth.core.celery import celery +from redditrepostsleuth.core.config import Config +from redditrepostsleuth.core.db.db_utils import get_db_engine +from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager +from redditrepostsleuth.core.exception import UtilApiException +from redditrepostsleuth.core.notification.notification_service import NotificationService +from redditrepostsleuth.core.services.eventlogging import EventLogging +from redditrepostsleuth.core.services.response_handler import ResponseHandler +from redditrepostsleuth.core.util.reddithelpers import get_reddit_instance + + +class AdultPromoterTask(Task): + def __init__(self): + self.config = Config() + self.reddit = get_reddit_instance(self.config) + self.uowm = UnitOfWorkManager(get_db_engine(self.config)) + self.event_logger = EventLogging(config=self.config) + self.response_handler = ResponseHandler(self.reddit, self.uowm, self.event_logger, + live_response=self.config.live_responses) + self.notification_svc = NotificationService(self.config) + self.redis_client = Redis( + host=self.config.redis_host, + port=self.config.redis_port, + db=self.config.redis_database, + password=self.config.redis_password + ) + + +@celery.task(bind=True, base=AdultPromoterTask, autoretry_for=(UtilApiException,ConnectionError,TooManyRequests), retry_kwards={'max_retries': 3}) +def check_user_comments_for_only_fans(self, username: str) -> None: + """ + This should be run after the profile check so we don't do any timeframe checking + :param self: + :param username: + :return: + """ + skip_names = ['[deleted]', 'AutoModerator'] + + if username in skip_names: + log.info('Skipping name %s', username) + return + + try: + with self.uowm.start() as uow: + user = uow.user_review.get_by_username(username) + + if not user: + log.error('User not found: %s', username) + + try: + result = check_user_comments_for_promoter_links(username) + except UserNotFound as e: + log.warning(e) + return + + if result: + log.info('Promoter found: %s - %s', username, str(result)) + user.content_links_found = True + user.notes = str(result) + uow.user_review.add(user) + uow.commit() + except (UtilApiException, ConnectionError, TooManyRequests) as e: + raise e + except IntegrityError: + pass + except Exception as e: + log.exception('') \ No newline at end of file diff --git a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py index 4bb4726..6d54e6c 100644 --- a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py @@ -86,7 +86,8 @@ def save_new_post(self, submission: dict, repost_check: bool = True): if repost_check: if post.post_type_id == 1: - celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post]) + pass + #celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_for_text_repost_task', args=[post]) elif post.post_type_id == 2: celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.check_image_repost_save', args=[post]) elif post.post_type_id == 3: diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index 93741f1..7b87526 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -87,6 +87,7 @@ def __repr__(self) -> str: post = relationship("Post", back_populates='hashes') hash_type = relationship("HashType") + #hash_type = relationship("HashType", lazy='joined') def to_dict(self): return { diff --git a/redditrepostsleuth/core/util/onlyfans_handling.py b/redditrepostsleuth/core/util/onlyfans_handling.py index 68dddbf..cbb22ec 100644 --- a/redditrepostsleuth/core/util/onlyfans_handling.py +++ b/redditrepostsleuth/core/util/onlyfans_handling.py @@ -24,7 +24,9 @@ 'deviantart.com', 'facebook.com', 'reddit.com', - 'youtube.com' + 'youtube.com', + 'twitch.tv', + 'discord.gg' ] landing_domains = [ @@ -86,11 +88,6 @@ def check_page_source_for_flagged_words(page_source: str) -> str: def process_landing_link(url: str) -> Optional[str]: url_to_fetch = f'{config.util_api}/page-source?url={url}' - parsed_url = urlparse(url) - all_urls = flagged_words + known_domains + landing_domains - if parsed_url.netloc not in all_urls: - # So we can flag landing domains we're not checking - log.error('---------------------------------------> %s', url) response = requests.get(url_to_fetch) if response.status_code != 200: log.warning('No page text return for %s', url) @@ -100,10 +97,11 @@ def process_landing_link(url: str) -> Optional[str]: def fetch_from_util_api(url: str) -> Response: + log.debug('Fetching %s', url) try: response = requests.get(url) - except ConnectionError: - log.error('Util API not responding') + except ConnectionError as e: + log.error('Util API not responding: %s', e) raise UtilApiException(f'Util API failed to connect') except Exception: log.exception('Unexpected exception from Util API') @@ -126,6 +124,8 @@ def get_profile_links(username: str) -> list[str]: profile_links = json.loads(response.text) return profile_links +links = [] + def check_user_for_promoter_links(username: str) -> Optional[LinkCheckResult]: profile_links = get_profile_links(username) @@ -152,6 +152,19 @@ def check_user_for_promoter_links(username: str) -> Optional[LinkCheckResult]: if landing_link_with_flagged_content: return LinkCheckResult(source='Comment landing', url=landing_link_with_flagged_content) +def check_user_comments_for_promoter_links(username: str) -> Optional[LinkCheckResult]: + comment_links = get_links_from_comments(username) + + content_links_found = check_links_for_flagged_domains(comment_links) + if content_links_found: + return LinkCheckResult(source='Comment', url=content_links_found) + + landing_link_found = check_links_for_landing_pages(comment_links) + if landing_link_found: + landing_link_with_flagged_content = process_landing_link(landing_link_found) + if landing_link_with_flagged_content: + return LinkCheckResult(source='Comment landing', url=landing_link_with_flagged_content) + def get_links_from_comments(username: str) -> list[str]: url = f'{config.util_api}/reddit/user-comment?username={username}' response = fetch_from_util_api(url) diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 28aa4b1..0abf31f 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -94,8 +94,8 @@ async def fetch_page_as_job(job: BatchedPostRequestJob, session: ClientSession) except TimeoutError as e: log.error('Request Timeout') job.status = JobStatus.ERROR - except ClientConnectorError: - log.error('Client Connection Error') + except ClientConnectorError as e: + log.error('Client Connection Error: %s', e) await asyncio.sleep(5) job.status = JobStatus.ERROR except ServerDisconnectedError as e: @@ -128,8 +128,8 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: except StopIteration: break - url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(chunk)}' - #url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(chunk)}' + #url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(chunk)}' + url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(chunk)}' job = BatchedPostRequestJob(url, chunk, JobStatus.STARTED) tasks.append(ensure_future(fetch_page_as_job(job, session))) if len(tasks) >= 50 or len(chunk) == 0: diff --git a/redditrepostsleuth/ingestsvc/requirements.txt b/redditrepostsleuth/ingestsvc/requirements.txt index 1765168..6f32925 100644 --- a/redditrepostsleuth/ingestsvc/requirements.txt +++ b/redditrepostsleuth/ingestsvc/requirements.txt @@ -7,4 +7,5 @@ imagehash==4.3.1 influxdb-client==1.37.0 aiohttp==3.9.0 sentry-sdk==1.29.2 -cryptography==41.0.6 \ No newline at end of file +cryptography==41.0.6 +redgifs==1.9.1 \ No newline at end of file From e1f60703f84a8329be8a73395d44afeaa79cc43c Mon Sep 17 00:00:00 2001 From: barry Date: Mon, 1 Apr 2024 14:34:40 -0400 Subject: [PATCH 08/10] Moving OF checking to only registered subreddits with the feature enabled --- .../core/celery/celeryconfig.py | 8 +-- .../task_logic/monitored_sub_task_logic.py | 14 +++++- .../celery/task_logic/scheduled_task_logic.py | 5 +- redditrepostsleuth/core/celery/tasks.py | 2 +- .../core/celery/tasks/ingest_tasks.py | 2 +- redditrepostsleuth/core/db/databasemodels.py | 2 +- .../core/util/onlyfans_handling.py | 49 +++++++++++++++++++ 7 files changed, 72 insertions(+), 10 deletions(-) diff --git a/redditrepostsleuth/core/celery/celeryconfig.py b/redditrepostsleuth/core/celery/celeryconfig.py index 9b920f6..cc06972 100644 --- a/redditrepostsleuth/core/celery/celeryconfig.py +++ b/redditrepostsleuth/core/celery/celeryconfig.py @@ -83,10 +83,10 @@ 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.queue_config_updates_task', 'schedule': 3600 }, - 'update-profile-token': { - 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_profile_token_task', - 'schedule': 120 - }, + # 'update-profile-token': { + # 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_profile_token_task', + # 'schedule': 120 + # }, 'update-daily-stats': { 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_daily_stats', 'schedule': 86400 diff --git a/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py b/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py index 86f869a..bb0f323 100644 --- a/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py @@ -5,7 +5,8 @@ from prawcore import TooManyRequests from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork -from redditrepostsleuth.core.exception import RateLimitException, NoIndexException +from redditrepostsleuth.core.exception import RateLimitException, NoIndexException, UtilApiException +from redditrepostsleuth.core.util.onlyfans_handling import check_user_for_only_fans from redditrepostsleuth.submonitorsvc.monitored_sub_service import MonitoredSubService log = logging.getLogger(__name__) @@ -24,7 +25,18 @@ def process_monitored_subreddit_submission(post_id: str, monitored_sub_svc: Moni log.warning('Unknown post type for %s - https://redd.it/%s', post.post_id, post.post_id) return + + + + monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit) + + if monitored_sub.adult_promoter_remove_post or monitored_sub.adult_promoter_ban_user or monitored_sub.adult_promoter_notify_mod_mail: + try: + check_user_for_only_fans(uow, post.author) + except (UtilApiException, ConnectionError, TooManyRequests) as e: + log.warning('Failed to do onlyfans check for user %s', post.author) + whitelisted_user = uow.user_whitelist.get_by_username_and_subreddit(post.author, monitored_sub.id) monitored_sub_svc.handle_only_fans_check(post, uow, monitored_sub, whitelisted_user=whitelisted_user) diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index 3308dd5..eb9e023 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -52,6 +52,7 @@ def update_proxies(uowm: UnitOfWorkManager) -> None: uow.http_proxy.delete_all() uow.commit() for proxy in res_data['results']: + print(proxy['proxy_address']) uow.http_proxy.add( HttpProxy(address=f'{proxy["proxy_address"]}:{proxy["port"]}', provider='WebShare') ) @@ -241,8 +242,8 @@ def update_monitored_sub_data( if __name__ == '__main__': uowm = UnitOfWorkManager(get_db_engine(Config())) - #update_proxies(uowm) - #sys.exit() + update_proxies(uowm) + sys.exit() while True: token_checker() time.sleep(240) diff --git a/redditrepostsleuth/core/celery/tasks.py b/redditrepostsleuth/core/celery/tasks.py index a311301..099af02 100644 --- a/redditrepostsleuth/core/celery/tasks.py +++ b/redditrepostsleuth/core/celery/tasks.py @@ -12,7 +12,7 @@ from redditrepostsleuth.core.util.videohelpers import generate_thumbnails_from_url, download_file, \ generate_thumbnails_from_file - +# TODO - Remove this file @celery.task(bind=True, base=EventLoggerTask, ignore_results=True, serializer='pickle') def log_event(self, event): self.event_logger.save_event(event) diff --git a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py index 6d54e6c..8d52b6a 100644 --- a/redditrepostsleuth/core/celery/tasks/ingest_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/ingest_tasks.py @@ -93,7 +93,7 @@ def save_new_post(self, submission: dict, repost_check: bool = True): elif post.post_type_id == 3: celery.send_task('redditrepostsleuth.core.celery.tasks.repost_tasks.link_repost_check', args=[post]) - celery.send_task('redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans', args=[post.author]) + #celery.send_task('redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans', args=[post.author]) diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index 7b87526..2ea6354 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -507,7 +507,7 @@ class MonitoredSubConfigRevision(Base): id = Column(Integer, primary_key=True) revision_id = Column(String(36), nullable=False, unique=True) revised_by = Column(String(25), nullable=False) - config = Column(String(2500), nullable=False) + config = Column(String(3000), nullable=False) config_loaded_at = Column(DateTime) is_valid = Column(Boolean, default=False) notified = Column(Boolean, default=False) diff --git a/redditrepostsleuth/core/util/onlyfans_handling.py b/redditrepostsleuth/core/util/onlyfans_handling.py index cbb22ec..0bf8fcf 100644 --- a/redditrepostsleuth/core/util/onlyfans_handling.py +++ b/redditrepostsleuth/core/util/onlyfans_handling.py @@ -2,16 +2,21 @@ import logging import re from dataclasses import dataclass +from datetime import datetime from typing import Optional from urllib.parse import urlparse import requests from praw import Reddit +from prawcore import TooManyRequests from requests import Response from requests.exceptions import ConnectionError +from sqlalchemy import func +from sqlalchemy.exc import IntegrityError from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import UserReview +from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork from redditrepostsleuth.core.exception import UtilApiException, UserNotFound log = logging.getLogger(__name__) @@ -216,3 +221,47 @@ def get_links_from_comments_praw(username: str, reddit: Reddit) -> list[str]: log.debug('User %s has %s comment links', username, len(all_urls)) return list(set(all_urls)) + +def check_user_for_only_fans(uow: UnitOfWork, username: str) -> Optional[UserReview]: + skip_names = ['[deleted]', 'AutoModerator'] + + if username in skip_names: + log.info('Skipping name %s', username) + return + + try: + user = uow.user_review.get_by_username(username) + + if user: + delta = datetime.utcnow() - user.last_checked + if delta.days < 30: + log.info('Skipping existing user %s, last check was %s days ago', username, delta.days) + return + user.content_links_found = False + user.notes = None + user.last_checked = func.utc_timestamp() + + log.info('Checking user %s', username) + if not user: + user = UserReview(username=username) + try: + result = check_user_for_promoter_links(username) + except UserNotFound as e: + log.warning(e) + return + + if result: + log.info('Promoter found: %s - %s', username, str(result)) + user.content_links_found = True + user.notes = str(result) + uow.user_review.add(user) + uow.commit() + return user + except (UtilApiException, ConnectionError, TooManyRequests) as e: + log.exception('') + raise e + except IntegrityError: + pass + except Exception as e: + log.exception('') + From 9c4815ea3b6f31ab521514a563e0619766c0a6f8 Mon Sep 17 00:00:00 2001 From: barry Date: Sat, 20 Apr 2024 20:33:53 -0400 Subject: [PATCH 09/10] Removed signatures and task cleanup --- docker-compose.yml | 2 +- redditrepostsleuth/adminsvc/inbox_monitor.py | 3 +- .../adminsvc/new_activation_monitor.py | 4 +- redditrepostsleuth/core/celery/admin_tasks.py | 5 ++- .../core/celery/celeryconfig.py | 33 +++++++++------- .../task_logic/monitored_sub_task_logic.py | 4 -- .../celery/task_logic/scheduled_task_logic.py | 2 +- .../core/celery/tasks/scheduled_tasks.py | 38 ++++++++++++++----- redditrepostsleuth/core/db/databasemodels.py | 2 + .../core/db/repository/repost_search_repo.py | 19 ++++++++++ .../core/services/responsebuilder.py | 8 ++-- .../core/util/onlyfans_handling.py | 16 ++++---- .../core/util/replytemplates.py | 3 +- .../summonssvc/summons_monitor.py | 3 ++ 14 files changed, 97 insertions(+), 45 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ab1d937..70a54e7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -198,6 +198,6 @@ services: - RUN_ENV=production - LOG_LEVEL=INFO - CELERY_IMPORTS=redditrepostsleuth.core.celery.tasks.scheduled_tasks,redditrepostsleuth.core.celery.admin_tasks - entrypoint: celery -A redditrepostsleuth.core.celery worker -Q subreddit_config_updates,update_wiki_from_database -n subreddit_config_update_worker --autoscale=8,1 + entrypoint: celery -A redditrepostsleuth.core.celery worker -Q batch_delete_searches,subreddit_config_updates,update_wiki_from_database -n subreddit_config_update_worker --autoscale=8,1 diff --git a/redditrepostsleuth/adminsvc/inbox_monitor.py b/redditrepostsleuth/adminsvc/inbox_monitor.py index 05ceef4..b4c505b 100644 --- a/redditrepostsleuth/adminsvc/inbox_monitor.py +++ b/redditrepostsleuth/adminsvc/inbox_monitor.py @@ -39,7 +39,8 @@ def check_inbox(self): print('[Scheduled Job] Checking Inbox Start') for msg in self.reddit.inbox.messages(limit=500): if msg.author != 'RepostSleuthBot' and msg.subject.lower() in ['false negative', 'false positive']: - self._process_user_report(msg) + #self._process_user_report(msg) + pass elif TOP_POST_WATCH_SUBJECT.lower() in msg.subject.lower(): self._process_watch_request(msg) diff --git a/redditrepostsleuth/adminsvc/new_activation_monitor.py b/redditrepostsleuth/adminsvc/new_activation_monitor.py index 4c30b63..761ce43 100644 --- a/redditrepostsleuth/adminsvc/new_activation_monitor.py +++ b/redditrepostsleuth/adminsvc/new_activation_monitor.py @@ -36,7 +36,7 @@ def __init__( def check_for_new_invites(self): for msg in self.reddit.inbox.messages(limit=1000): if 'invitation to moderate' in msg.subject: - log.info('Found invitation for %s', msg.subreddit.display_name) + log.debug('Found invitation for %s', msg.subreddit.display_name) self.activate_sub(msg) @@ -100,7 +100,7 @@ def _create_monitored_sub_in_db(self, msg: Message) -> MonitoredSub: with self.uowm.start() as uow: existing = uow.monitored_sub.get_by_sub(msg.subreddit.display_name) if existing: - log.info('Monitored sub %s already exists, skipping activation', msg.subreddit.display_name) + log.debug('Monitored sub %s already exists, skipping activation', msg.subreddit.display_name) raise ValueError(f'Monitored Sub already in database: {msg.subreddit.display_name}') monitored_sub = MonitoredSub(**{**DEFAULT_CONFIG_VALUES, **{'name': msg.subreddit.display_name}}) uow.monitored_sub.add(monitored_sub) diff --git a/redditrepostsleuth/core/celery/admin_tasks.py b/redditrepostsleuth/core/celery/admin_tasks.py index 2c6fb28..7f5f82a 100644 --- a/redditrepostsleuth/core/celery/admin_tasks.py +++ b/redditrepostsleuth/core/celery/admin_tasks.py @@ -17,6 +17,7 @@ from redditrepostsleuth.core.exception import UtilApiException, UserNotFound from redditrepostsleuth.core.logfilters import ContextFilter from redditrepostsleuth.core.logging import log, configure_logger +from redditrepostsleuth.core.util.helpers import chunk_list from redditrepostsleuth.core.util.onlyfans_handling import check_user_for_promoter_links, \ check_user_comments_for_promoter_links @@ -207,4 +208,6 @@ def check_user_comments_for_only_fans(self, username: str) -> None: except IntegrityError: pass except Exception as e: - log.exception('') \ No newline at end of file + log.exception('') + + diff --git a/redditrepostsleuth/core/celery/celeryconfig.py b/redditrepostsleuth/core/celery/celeryconfig.py index cc06972..35899da 100644 --- a/redditrepostsleuth/core/celery/celeryconfig.py +++ b/redditrepostsleuth/core/celery/celeryconfig.py @@ -28,17 +28,18 @@ 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.*': {'queue': 'scheduled_tasks'}, 'redditrepostsleuth.core.celery.admin_tasks.update_proxies_job': {'queue': 'scheduled_tasks'}, 'redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans': {'queue': 'onlyfans_check'}, - 'redditrepostsleuth.core.celery.admin_tasks.update_subreddit_config_from_database': {'queue': 'update_wiki_from_database'} + 'redditrepostsleuth.core.celery.admin_tasks.update_subreddit_config_from_database': {'queue': 'update_wiki_from_database'}, + 'redditrepostsleuth.core.celery.admin_tasks.delete_search_batch': {'queue': 'batch_delete_searches'}, } beat_schedule = { - 'update-proxy-list': { - 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_proxies_task', - 'schedule': 3600 - }, + # 'update-proxy-list': { + # 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_proxies_task', + # 'schedule': 3600 + # }, 'check-inbox': { 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.check_inbox_task', 'schedule': 300 @@ -60,7 +61,7 @@ 'schedule': 300 }, 'update-top-reposts': { - 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_top_reposts_task', + 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_all_top_reposts_task', 'schedule': 86400 }, 'update-top-reposters': { @@ -71,14 +72,14 @@ 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_daily_top_reposters_task', 'schedule': 900 }, - 'send-reports-to-meme-voting': { - 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.send_reports_to_meme_voting_task', - 'schedule': 3600 - }, - 'check-meme-template-potential-votes': { - 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.check_meme_template_potential_votes_task', - 'schedule': 1800 - }, + # 'send-reports-to-meme-voting': { + # 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.send_reports_to_meme_voting_task', + # 'schedule': 3600 + # }, + # 'check-meme-template-potential-votes': { + # 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.check_meme_template_potential_votes_task', + # 'schedule': 1800 + # }, 'monitored-sub-config-update': { 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.queue_config_updates_task', 'schedule': 3600 @@ -91,6 +92,10 @@ 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.update_daily_stats', 'schedule': 86400 }, + 'search-history-cleanup': { + 'task': 'redditrepostsleuth.core.celery.tasks.scheduled_tasks.queue_search_history_cleanup', + 'schedule': 3600 + }, } diff --git a/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py b/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py index bb0f323..ee96061 100644 --- a/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py @@ -25,10 +25,6 @@ def process_monitored_subreddit_submission(post_id: str, monitored_sub_svc: Moni log.warning('Unknown post type for %s - https://redd.it/%s', post.post_id, post.post_id) return - - - - monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit) if monitored_sub.adult_promoter_remove_post or monitored_sub.adult_promoter_ban_user or monitored_sub.adult_promoter_notify_mod_mail: diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index eb9e023..991a560 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -89,7 +89,7 @@ def update_top_reposts(uow: UnitOfWork, post_type_id: int, day_range: int = None uow.commit() def run_update_top_reposts(uow: UnitOfWork) -> None: - post_types = [1, 2, 3] + post_types = [2, 3] day_ranges = [1, 7, 14, 30, None] for post_type_id in post_types: for days in day_ranges: diff --git a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py index 7f2b117..8cd9f7f 100644 --- a/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/scheduled_tasks.py @@ -12,6 +12,7 @@ token_checker, run_update_top_reposters, update_top_reposters, update_monitored_sub_data, run_update_top_reposts from redditrepostsleuth.core.db.databasemodels import MonitoredSub, StatsDailyCount from redditrepostsleuth.core.logging import configure_logger +from redditrepostsleuth.core.util.helpers import chunk_list from redditrepostsleuth.core.util.reddithelpers import is_sub_mod_praw, get_bot_permissions from redditrepostsleuth.core.util.replytemplates import MONITORED_SUB_MOD_REMOVED_CONTENT, \ MONITORED_SUB_MOD_REMOVED_SUBJECT @@ -204,14 +205,6 @@ def update_daily_top_reposters_task(self): except Exception as e: log.exception('Unknown task error') -@celery.task(bind=True, base=SqlAlchemyTask) -def update_top_reposts_task(self): - try: - update_top_reposts(self.uowm) - except Exception as e: - log.exception('Unknown task exception') - - @celery.task(bind=True, base=RedditTask, autoretry_for=(TooManyRequests,), retry_kwards={'max_retries': 3}) def update_monitored_sub_stats_task(self, sub_name: str) -> None: @@ -243,4 +236,31 @@ def update_proxies_task(self) -> None: @celery.task def update_profile_token_task(): print('Staring token checker') - token_checker() \ No newline at end of file + token_checker() + +@celery.task(bind=True, base=SqlAlchemyTask) +def delete_search_batch(self, ids: list[int]): + try: + with self.uowm.start() as uow: + log.info('Starting range %s:%s', ids[0], ids[-1]) + for id in ids: + search = uow.repost_search.get_by_id(id) + if search: + log.debug('Deleting search %s', search.id) + uow.repost_search.remove(search) + uow.commit() + log.info('Finished range %s:%s', ids[0], ids[-1]) + except Exception as e: + log.exception('') + +@celery.task(bind=True, base=SqlAlchemyTask) +def queue_search_history_cleanup(self): + with self.uowm.start() as uow: + searches = uow.repost_search.get_all_ids_older_than_days(120, limit=100000000) + if not searches: + log.info('No search history to cleanup') + return + log.info('Queuing Search History Cleanup. Range: ID Range: %s:%s', searches[0].id, searches[-1].id) + ids = [x[0] for x in searches] + for chunk in chunk_list(ids, 5000): + delete_search_batch.apply_async((chunk,), queue='batch_delete_searches') \ No newline at end of file diff --git a/redditrepostsleuth/core/db/databasemodels.py b/redditrepostsleuth/core/db/databasemodels.py index 2ea6354..6adb105 100644 --- a/redditrepostsleuth/core/db/databasemodels.py +++ b/redditrepostsleuth/core/db/databasemodels.py @@ -226,6 +226,8 @@ class RepostSearch(Base): searched_at = Column(DateTime, default=func.utc_timestamp(), nullable=False) post = relationship("Post", back_populates='searches') + monitored_sub_checked = relationship("MonitoredSubChecks", back_populates="search") + repost = relationship("Repost", back_populates="search") post_type = relationship('PostType') def __repr__(self): diff --git a/redditrepostsleuth/core/db/repository/repost_search_repo.py b/redditrepostsleuth/core/db/repository/repost_search_repo.py index cc7467c..3644ed9 100644 --- a/redditrepostsleuth/core/db/repository/repost_search_repo.py +++ b/redditrepostsleuth/core/db/repository/repost_search_repo.py @@ -19,6 +19,25 @@ def add(self, search: RepostSearch): def update(self, revision: RepostSearch): self.db_session.update(revision) + def get_all_older_than_days(self, days: int, limit: int = 100) -> list[RepostSearch]: + delta = datetime.utcnow() - timedelta(days=days) + return self.db_session.query(RepostSearch).filter(RepostSearch.searched_at < delta).limit(limit).all() + + def get_all_ids_older_than_days(self, days: int, limit: int = 100) -> list[RepostSearch]: + delta = datetime.utcnow() - timedelta(days=days) + return self.db_session.query(RepostSearch.id).filter(RepostSearch.searched_at < delta).order_by(RepostSearch.id).limit(limit).all() + + + def delete_all_older_than_days(self, days: int, limit: int = 100) -> None: + delta = datetime.utcnow() - timedelta(days=days) + self.db_session.query(RepostSearch).filter(RepostSearch.searched_at < delta).limit(limit).delete() + + def delete_all_with_lower_id(self, lower_id: int) -> None: + self.db_session.query(RepostSearch).filter(RepostSearch.id < lower_id).delete() + + def get_oldest_search(self) -> RepostSearch: + return self.db_session.query(RepostSearch).order_by(RepostSearch.id).first() + def get_all(self, limit: int = None): return self.db_session.query(RepostSearch).limit(limit).all() diff --git a/redditrepostsleuth/core/services/responsebuilder.py b/redditrepostsleuth/core/services/responsebuilder.py index 17d3fef..27287ef 100644 --- a/redditrepostsleuth/core/services/responsebuilder.py +++ b/redditrepostsleuth/core/services/responsebuilder.py @@ -163,10 +163,10 @@ def build_default_comment( if closest_template: message += f'\n\n{closest_template}' - if signature: - message += f'\n\n{self._get_signature(search_results)} - {REPORT_POST_LINK}' - else: - message += f'\n\n{REPORT_POST_LINK}' + # if signature: + # message += f'\n\n{self._get_signature(search_results)} - {REPORT_POST_LINK}' + # else: + # message += f'\n\n{REPORT_POST_LINK}' # Checking post type is temp until the site supports everything if search_link and search_results.checked_post.post_type.name in ['image']: diff --git a/redditrepostsleuth/core/util/onlyfans_handling.py b/redditrepostsleuth/core/util/onlyfans_handling.py index 0bf8fcf..63ede19 100644 --- a/redditrepostsleuth/core/util/onlyfans_handling.py +++ b/redditrepostsleuth/core/util/onlyfans_handling.py @@ -38,7 +38,8 @@ 'beacons.ai', 'linktr.ee', 'linkbio.co', - 'snipfeed.co' + 'snipfeed.co', + 'allmylink.me' ] flagged_words = [ @@ -117,19 +118,20 @@ def fetch_from_util_api(url: str) -> Response: def get_profile_links(username: str) -> list[str]: url = f'{config.util_api}/profile?username={username}' response = fetch_from_util_api(url) + if response.status_code == 200: + profile_links = json.loads(response.text) + return profile_links if response.status_code == 404: log.info('Redditor %s no longer exists', username) raise UserNotFound(f'Redditor {username} no longer exists') - - if response.status_code != 200: + elif response.status_code == 503: + log.info('No token to cehck user with') + return [] + else: log.warning('Non 200 return code %s from Util API', response.status_code) raise UtilApiException(f'Unexpected status {response.status_code} from util API') - profile_links = json.loads(response.text) - return profile_links - -links = [] def check_user_for_promoter_links(username: str) -> Optional[LinkCheckResult]: diff --git a/redditrepostsleuth/core/util/replytemplates.py b/redditrepostsleuth/core/util/replytemplates.py index 937c8b8..3a0bfb1 100644 --- a/redditrepostsleuth/core/util/replytemplates.py +++ b/redditrepostsleuth/core/util/replytemplates.py @@ -75,7 +75,8 @@ TOP_POST_REPORT_MSG = 'Looks like a repost. I\'ve seen this {post_type} {match_count} {times_word}. First seen {oldest_shortlink}' \ -IMAGE_SEARCH_SETTINGS = '**Scope:** {search_scope} | **Meme Filter:** {meme_filter_used} | **Target:** {effective_target_match_percent}% | **Check Title:** {check_title} | **Max Age:** {max_age}' +IMAGE_SEARCH_SETTINGS = '**Scope:** {search_scope} | **Target Percent:** {effective_target_match_percent}% | **Max Age:** {max_age}' +#IMAGE_SEARCH_SETTINGS = '**Scope:** {search_scope} | **Meme Filter:** {meme_filter_used} | **Target:** {effective_target_match_percent}% | **Check Title:** {check_title} | **Max Age:** {max_age}' GENERIC_SEARCH_SETTINGS = '**Scope:** {search_scope} | **Check Title:** {check_title} | **Max Age:** {max_days_old}' REPORT_RESPONSE = 'Thank you for your report. \n\nIt has been documented and will be reviewed further' diff --git a/redditrepostsleuth/summonssvc/summons_monitor.py b/redditrepostsleuth/summonssvc/summons_monitor.py index 5c175df..c1a1430 100644 --- a/redditrepostsleuth/summonssvc/summons_monitor.py +++ b/redditrepostsleuth/summonssvc/summons_monitor.py @@ -87,6 +87,9 @@ def handle_summons(summons: Summons) -> None: def monitor_for_mentions(reddit: Reddit, uowm: UnitOfWorkManager): for comment in reddit.inbox.mentions(): + if not comment.author: + log.info('Skipping comment without author') + continue if comment.created_utc < datetime.utcnow().timestamp() - 86400: log.debug('Skipping old mention. Created at %s', datetime.fromtimestamp(comment.created_utc)) From dd2f360b07be8ed29911775ff6ca626fcaeeb10b Mon Sep 17 00:00:00 2001 From: Matthew Carey Date: Mon, 27 May 2024 15:43:46 -0400 Subject: [PATCH 10/10] Feature/admin actions to tasks (#378) * Move reddit actions to celery queue * Fix ingest to use auth token * Ingest fixes and event cleanup * flake error --- docker-compose.yml | 16 + .../adminsvc/new_activation_monitor.py | 13 +- redditrepostsleuth/core/celery/basetasks.py | 1 - .../core/celery/celeryconfig.py | 1 + .../task_logic/monitored_sub_task_logic.py | 2 +- .../celery/task_logic/scheduled_task_logic.py | 13 +- .../core/celery/tasks/adult_promoter_tasks.py | 9 +- .../core/celery/tasks/monitored_sub_tasks.py | 17 +- .../core/celery/tasks/reddit_action_tasks.py | 308 +++++++++++++++++ redditrepostsleuth/core/exception.py | 4 + .../model/events/RedditAdminActionEvent.py | 16 + .../events/ingest_image_process_event.py | 16 - .../core/model/events/repostevent.py | 14 - .../core/model/events/sub_monitor_event.py | 16 - .../core/model/events/summonsevent.py | 18 - .../core/services/duplicateimageservice.py | 3 +- .../core/services/eventlogging.py | 2 +- .../core/services/response_handler.py | 23 +- .../core/services/subreddit_config_updater.py | 84 ++--- redditrepostsleuth/core/util/helpers.py | 13 +- .../core/util/onlyfans_handling.py | 31 +- .../core/util/replytemplates.py | 4 +- redditrepostsleuth/ingestsvc/ingestsvc.py | 117 +++++-- .../submonitorsvc/monitored_sub_service.py | 153 ++------ tests/adminsvc/test_new_activation_monitor.py | 15 - .../response_builder_expected_responses.py | 31 +- tests/submonitorsvc/test_subMonitor.py | 326 +++++++++--------- 27 files changed, 781 insertions(+), 485 deletions(-) create mode 100644 redditrepostsleuth/core/celery/tasks/reddit_action_tasks.py create mode 100644 redditrepostsleuth/core/model/events/RedditAdminActionEvent.py delete mode 100644 redditrepostsleuth/core/model/events/ingest_image_process_event.py delete mode 100644 redditrepostsleuth/core/model/events/repostevent.py delete mode 100644 redditrepostsleuth/core/model/events/sub_monitor_event.py delete mode 100644 redditrepostsleuth/core/model/events/summonsevent.py diff --git a/docker-compose.yml b/docker-compose.yml index 70a54e7..62dc26e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -122,6 +122,22 @@ services: - CELERY_IMPORTS=redditrepostsleuth.core.celery.tasks.monitored_sub_tasks entrypoint: celery -A redditrepostsleuth.core.celery worker -Q submonitor -n submonitor_worker --autoscale=6,2 + reddit_actions_worker: + container_name: reddit-actions-worker + restart: unless-stopped + user: '1001' + build: + context: . + dockerfile: docker/WorkerDockerFile + env_file: + - .env + environment: + - RUN_ENV=production + - db_user=sub_monitor + - LOG_LEVEL=INFO + - CELERY_IMPORTS=redditrepostsleuth.core.celery.tasks.reddit_action_tasks + entrypoint: celery -A redditrepostsleuth.core.celery worker -Q reddit_actions -n reddit_actions --autoscale=3,2 + ingest_worker: restart: unless-stopped container_name: ingest-worker diff --git a/redditrepostsleuth/adminsvc/new_activation_monitor.py b/redditrepostsleuth/adminsvc/new_activation_monitor.py index 761ce43..dbceb87 100644 --- a/redditrepostsleuth/adminsvc/new_activation_monitor.py +++ b/redditrepostsleuth/adminsvc/new_activation_monitor.py @@ -7,6 +7,7 @@ from praw.models import Subreddit, Message from prawcore import TooManyRequests +from redditrepostsleuth.core.celery.tasks.reddit_action_tasks import send_modmail_task from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import MonitoredSub from redditrepostsleuth.core.db.db_utils import get_db_engine @@ -78,11 +79,13 @@ def _notify_added(self, subreddit: Subreddit) -> NoReturn: log.info('Sending success PM to %s', subreddit.display_name) wiki_url = f'https://www.reddit.com/r/{subreddit.display_name}/wiki/repost_sleuth_config' try: - self.response_handler.send_mod_mail( - subreddit.display_name, - MONITORED_SUB_ADDED.format(wiki_config=wiki_url), - 'Repost Sleuth Activated', - source='activation' + send_modmail_task.apply_async( + ( + subreddit.display_name, + MONITORED_SUB_ADDED.format(wiki_config=wiki_url), + 'Repost Sleuth Activated', + ), + {'source': 'activation'} ) monitored_sub.activation_notification_sent = True except RedditAPIException as e: diff --git a/redditrepostsleuth/core/celery/basetasks.py b/redditrepostsleuth/core/celery/basetasks.py index 5222463..fed1eeb 100644 --- a/redditrepostsleuth/core/celery/basetasks.py +++ b/redditrepostsleuth/core/celery/basetasks.py @@ -5,7 +5,6 @@ from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager from redditrepostsleuth.core.notification.notification_service import NotificationService from redditrepostsleuth.core.services.eventlogging import EventLogging -from redditrepostsleuth.core.services.reddit_manager import RedditManager from redditrepostsleuth.core.services.response_handler import ResponseHandler from redditrepostsleuth.core.services.subreddit_config_updater import SubredditConfigUpdater from redditrepostsleuth.core.util.helpers import get_reddit_instance diff --git a/redditrepostsleuth/core/celery/celeryconfig.py b/redditrepostsleuth/core/celery/celeryconfig.py index 35899da..c1db7d5 100644 --- a/redditrepostsleuth/core/celery/celeryconfig.py +++ b/redditrepostsleuth/core/celery/celeryconfig.py @@ -30,6 +30,7 @@ 'redditrepostsleuth.core.celery.admin_tasks.check_user_for_only_fans': {'queue': 'onlyfans_check'}, 'redditrepostsleuth.core.celery.admin_tasks.update_subreddit_config_from_database': {'queue': 'update_wiki_from_database'}, 'redditrepostsleuth.core.celery.admin_tasks.delete_search_batch': {'queue': 'batch_delete_searches'}, + 'redditrepostsleuth.core.celery.tasks.reddit_action_tasks.*': {'queue': 'reddit_actions'}, } diff --git a/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py b/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py index ee96061..0914633 100644 --- a/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/monitored_sub_task_logic.py @@ -29,7 +29,7 @@ def process_monitored_subreddit_submission(post_id: str, monitored_sub_svc: Moni if monitored_sub.adult_promoter_remove_post or monitored_sub.adult_promoter_ban_user or monitored_sub.adult_promoter_notify_mod_mail: try: - check_user_for_only_fans(uow, post.author) + check_user_for_only_fans(uow, post.author, monitored_sub_svc.reddit) except (UtilApiException, ConnectionError, TooManyRequests) as e: log.warning('Failed to do onlyfans check for user %s', post.author) diff --git a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py index 991a560..8bbabc0 100644 --- a/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py +++ b/redditrepostsleuth/core/celery/task_logic/scheduled_task_logic.py @@ -13,6 +13,7 @@ from prawcore import NotFound, Forbidden, Redirect from sqlalchemy import text, func +from redditrepostsleuth.core.celery.tasks.reddit_action_tasks import send_modmail_task from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import HttpProxy, StatsTopRepost, StatsTopReposter from redditrepostsleuth.core.db.db_utils import get_db_engine @@ -195,15 +196,15 @@ def update_monitored_sub_data( if monitored_sub.failed_admin_check_count == 2: subreddit = reddit.subreddit(monitored_sub.name) message = MONITORED_SUB_MOD_REMOVED_CONTENT.format(hours='72', subreddit=monitored_sub.name) - try: - response_handler.send_mod_mail( + + send_modmail_task.apply_async( + ( subreddit.display_name, message, MONITORED_SUB_MOD_REMOVED_SUBJECT, - source='mod_check' - ) - except PRAWException: - pass + ), + {'source': 'mod_check'} + ) return elif monitored_sub.failed_admin_check_count >= 4 and monitored_sub.name.lower() != 'dankmemes': notification_svc.send_notification( diff --git a/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py b/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py index 30980a1..5990a9e 100644 --- a/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/adult_promoter_tasks.py @@ -1,17 +1,22 @@ +import logging + from celery import Task from prawcore import TooManyRequests from redis import Redis +from sqlalchemy.exc import IntegrityError from redditrepostsleuth.core.celery import celery from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.db_utils import get_db_engine from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager -from redditrepostsleuth.core.exception import UtilApiException +from redditrepostsleuth.core.exception import UtilApiException, UserNotFound from redditrepostsleuth.core.notification.notification_service import NotificationService from redditrepostsleuth.core.services.eventlogging import EventLogging from redditrepostsleuth.core.services.response_handler import ResponseHandler +from redditrepostsleuth.core.util.onlyfans_handling import check_user_comments_for_promoter_links from redditrepostsleuth.core.util.reddithelpers import get_reddit_instance +# TODO - THis should be safe to remove class AdultPromoterTask(Task): def __init__(self): @@ -30,6 +35,8 @@ def __init__(self): ) +log = logging.getLogger(__name__) + @celery.task(bind=True, base=AdultPromoterTask, autoretry_for=(UtilApiException,ConnectionError,TooManyRequests), retry_kwards={'max_retries': 3}) def check_user_comments_for_only_fans(self, username: str) -> None: """ diff --git a/redditrepostsleuth/core/celery/tasks/monitored_sub_tasks.py b/redditrepostsleuth/core/celery/tasks/monitored_sub_tasks.py index 1711919..81f6eb4 100644 --- a/redditrepostsleuth/core/celery/tasks/monitored_sub_tasks.py +++ b/redditrepostsleuth/core/celery/tasks/monitored_sub_tasks.py @@ -44,7 +44,8 @@ def __init__(self): response_handler = ResponseHandler(self.reddit, self.uowm, event_logger, source='submonitor', live_response=self.config.live_responses) dup_image_svc = DuplicateImageService(self.uowm, event_logger, self.reddit, config=self.config) response_builder = ResponseBuilder(self.uowm) - self.monitored_sub_svc = MonitoredSubService(dup_image_svc, self.uowm, self.reddit, response_builder, response_handler, event_logger=event_logger, config=self.config) + self.monitored_sub_svc = MonitoredSubService(dup_image_svc, self.uowm, self.reddit, response_builder, event_logger=event_logger, config=self.config) + @celery.task( @@ -55,11 +56,15 @@ def __init__(self): retry_kwards={'max_retries': 3} ) def sub_monitor_check_post(self, post_id: str, monitored_sub: MonitoredSub): - update_log_context_data(log, {'trace_id': str(randint(100000, 999999)), 'post_id': post_id, - 'subreddit': monitored_sub.name, 'service': 'Subreddit_Monitor'}) - - with self.uowm.start() as uow: - process_monitored_subreddit_submission(post_id, self.monitored_sub_svc, uow) + try: + update_log_context_data(log, {'trace_id': str(randint(100000, 999999)), 'post_id': post_id, + 'subreddit': monitored_sub.name, 'service': 'Subreddit_Monitor'}) + + with self.uowm.start() as uow: + process_monitored_subreddit_submission(post_id, self.monitored_sub_svc, uow) + except Exception as e: + log.exception('General failure') + pass @celery.task( diff --git a/redditrepostsleuth/core/celery/tasks/reddit_action_tasks.py b/redditrepostsleuth/core/celery/tasks/reddit_action_tasks.py new file mode 100644 index 0000000..b364569 --- /dev/null +++ b/redditrepostsleuth/core/celery/tasks/reddit_action_tasks.py @@ -0,0 +1,308 @@ +from celery import Task +from praw.exceptions import RedditAPIException +from praw.models import Comment, Submission +from prawcore import Forbidden, TooManyRequests + +from redditrepostsleuth.core.celery import celery +from redditrepostsleuth.core.config import Config +from redditrepostsleuth.core.db.db_utils import get_db_engine +from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager +from redditrepostsleuth.core.logging import get_configured_logger +from redditrepostsleuth.core.model.events.RedditAdminActionEvent import RedditAdminActionEvent +from redditrepostsleuth.core.notification.notification_service import NotificationService +from redditrepostsleuth.core.services.eventlogging import EventLogging +from redditrepostsleuth.core.services.response_handler import ResponseHandler +from redditrepostsleuth.core.util.helpers import get_removal_reason_id +from redditrepostsleuth.core.util.reddithelpers import get_reddit_instance +from redditrepostsleuth.core.util.replytemplates import NO_BAN_PERMISSIONS + +log = get_configured_logger(name='redditrepostsleuth') + +class RedditActionTask(Task): + def __init__(self): + self.config = Config() + self.reddit = get_reddit_instance(self.config) + self.uowm = UnitOfWorkManager(get_db_engine(self.config)) + self.event_logger = EventLogging(config=self.config) + self.notification_svc = NotificationService(self.config) + self.response_handler = ResponseHandler(self.reddit, self.uowm, self.event_logger, live_response=self.config.live_responses) + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def remove_submission_task(self, submission: Submission, removal_reason: str, mod_note: str = None) -> None: + try: + removal_reason_id = get_removal_reason_id(removal_reason, submission.subreddit) + log.info('Attempting to remove post https://redd.it/%s with removal ID %s', submission.id, removal_reason_id) + submission.mod.remove(reason_id=removal_reason_id, mod_note=mod_note) + self.event_logger.save_event( + RedditAdminActionEvent( + submission.subreddit.display_name, + 'remove_submission' + ) + ) + except Forbidden: + log.error('Failed to remove post https://redd.it/%s, no permission', submission.id) + send_modmail_task.apply_async( + ( + submission.subreddit.display_name, + f'Failed to remove https://redd.it/{submission.id}.\n\nI do not appear to have the required permissions', + 'RepostSleuthBot Missing Permissions' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when removing submission') + raise e + except Exception as e: + log.exception('Failed to remove submission https://redd.it/%s', submission.id, exc_info=True) + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def ban_user_task(self, username: str, subreddit_name: str, ban_reason: str, note: str = None) -> None: + log.info('Banning user %s from %s', username, subreddit_name) + + try: + subreddit = self.reddit.subreddit(subreddit_name) + subreddit.banned.add(username, ban_reason=ban_reason, note=note) + self.event_logger.save_event( + RedditAdminActionEvent( + subreddit_name, + 'ban_user' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when banning user') + raise e + except Forbidden: + log.warning('Unable to ban user %s on %s. No permissions', username, subreddit_name) + message_body = NO_BAN_PERMISSIONS.format( + username=username, + subreddit=subreddit_name + ) + + send_modmail_task.apply_async( + ( + subreddit_name, + message_body, + f'Unable To Ban User, No Permissions' + ) + ) + except RedditAPIException as e: + if e.error_type == 'TOO_LONG': + log.warning('Ban reason for subreddit %s is %s and should be no longer than 100', subreddit_name, len(ban_reason)) + send_modmail_task.apply_async( + ( + subreddit_name, + f'I attempted to ban u/{username} from r/{subreddit_name}. However, this failed since the ban reason is over 100 characters. \n\nPlease reduce the size of the ban reason. ', + 'Error When Banning User' + ) + ) + return + raise e + except Exception as e: + log.exception('Failed to ban %s from %s', username, subreddit_name) + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def lock_submission_task(self, submission: Submission) -> None: + log.info('Locking submission https://redd.it/%s', submission.id) + try: + submission.mod.lock() + self.event_logger.save_event( + RedditAdminActionEvent( + submission.subreddit.display_name, + 'submission_lock' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when locking submission') + raise e + except Forbidden as e: + log.warning('Failed to lock submission, no permissions on r/%s', submission.subreddit.display_name) + except Exception as e: + log.exception('Failed to lock submission https://redd.it/%s', submission.id) + raise e + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def lock_comment_task(self, comment: Comment) -> None: + log.info('Locking comment https://reddit.com%s', comment.permalink) + try: + comment.mod.lock() + self.event_logger.save_event( + RedditAdminActionEvent( + comment.subreddit.display_name, + 'comment_lock' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when locking comment') + raise e + except Forbidden as e: + log.warning('Failed to lock comment on r/%s, no permissions', comment.submission.display_name) + except Exception as e: + log.exception('') + raise e + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def sticky_comment_task(self, comment: Comment) -> None: + log.info('Make comment sticky: https://reddit.com%s ', comment.permalink) + try: + comment.mod.distinguish(sticky=True) + self.event_logger.save_event( + RedditAdminActionEvent( + comment.subreddit.display_name, + 'comment_sticky' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when sticky comment') + raise e + except Forbidden as e: + log.warning('Failed to sticky comment on r/%s, no permissions', comment.subreddit.display_name) + except Exception as e: + log.exception('') + raise e + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def mark_as_oc_task(self, submission: Submission) -> None: + log.info('Marking submission %s as OC', submission.id) + try: + submission.mod.set_original_content() + self.event_logger.save_event( + RedditAdminActionEvent( + submission.subreddit.display_name, + 'submission_mark_oc' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when marking submission OC') + raise e + except Forbidden as e: + log.warning('Failed to mark submission %s as OC on r/%s, no permissions', submission.id, submission.subreddit.display_name) + send_modmail_task.apply_async( + ( + submission.subreddit.display_name, + f'Failed to mark https://redd.it/{submission.id} as OC.\n\nI do not appear to have the required permissions', + 'RepostSleuthBot Missing Permissions' + ) + ) + except Exception as e: + log.exception('') + raise e + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def report_submission_task(self, submission: Submission, report_msg: str) -> None: + log.info('Reporting submission https://redd.it/%s', submission.id) + try: + submission.report(report_msg[:99]) # TODO: Until database column length is fixed + self.event_logger.save_event( + RedditAdminActionEvent( + submission.subreddit.display_name, + 'submission_report' + ) + ) + except TooManyRequests as e: + log.warning('Too many requests when reporting submission') + raise e + except Exception as e: + log.exception('Failed to report submission %s', submission.id, exc_info=True) + raise e + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def leave_comment_task( + self, + submission_id: str, + message: str, + sticky_comment: bool = False, + lock_comment: bool = False, + source: str = 'submonitor' +) -> None: + try: + comment = self.response_handler.reply_to_submission(submission_id, message, source) + except TooManyRequests as e: + log.warning('Too many requests when removing submission') + raise e + except RedditAPIException as e: + if e.error_type == 'THREAD_LOCKED': + return + raise e + except Exception as e: + log.exception('Failed to leave comment on submission %s', submission_id) + return + + if not comment: + log.debug('No comment returned from response handler') + return + + if sticky_comment: + sticky_comment_task.apply_async((comment,)) + + if lock_comment: + lock_comment_task.apply_async((comment,)) + + +@celery.task( + bind=True, + ignore_result=True, + base=RedditActionTask, + autoretry_for=(TooManyRequests,), + retry_kwards={'max_retries': 3} +) +def send_modmail_task(self, subreddit_name: str, message: str, subject: str, source: str = 'sub_monitor') -> None: + log.info('Sending modmail to r/%s', subreddit_name) + try: + self.response_handler.send_mod_mail( + subreddit_name, + message, + subject, + source=source + ) + except TooManyRequests as e: + log.warning('Too many requests when sending modmail') + raise e + except Exception as e: + log.exception('Failed to send modmail to %s', subreddit_name) \ No newline at end of file diff --git a/redditrepostsleuth/core/exception.py b/redditrepostsleuth/core/exception.py index 2e8d32a..6fed37d 100644 --- a/redditrepostsleuth/core/exception.py +++ b/redditrepostsleuth/core/exception.py @@ -75,3 +75,7 @@ def __init__(self, message): class RedGifsTokenException(RepostSleuthException): def __init__(self, message): super(RedGifsTokenException, self).__init__(message) + +class RedditTokenExpiredException(RepostSleuthException): + def __init__(self, message): + super(RedditTokenExpiredException, self).__init__(message) \ No newline at end of file diff --git a/redditrepostsleuth/core/model/events/RedditAdminActionEvent.py b/redditrepostsleuth/core/model/events/RedditAdminActionEvent.py new file mode 100644 index 0000000..1a20fd0 --- /dev/null +++ b/redditrepostsleuth/core/model/events/RedditAdminActionEvent.py @@ -0,0 +1,16 @@ +from redditrepostsleuth.core.model.events.influxevent import InfluxEvent + + +class RedditAdminActionEvent(InfluxEvent): + def __init__(self, subreddit: str, action: str, event_type:str = None): + super(RedditAdminActionEvent, self).__init__(event_type=event_type) + self.subreddit = subreddit + self.count = 1 + self.action = action + + def get_influx_event(self): + event = super().get_influx_event() + #event[0]['fields']['count'] = self.count + event[0]['tags']['subreddit'] = self.subreddit + event[0]['tags']['action'] = self.action + return event \ No newline at end of file diff --git a/redditrepostsleuth/core/model/events/ingest_image_process_event.py b/redditrepostsleuth/core/model/events/ingest_image_process_event.py deleted file mode 100644 index b5ea7fe..0000000 --- a/redditrepostsleuth/core/model/events/ingest_image_process_event.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import Text - -from redditrepostsleuth.core.model.events.influxevent import InfluxEvent - - -class IngestImageProcessEvent(InfluxEvent): - def __init__(self, domain: Text, status_code: int, event_type=None): - super().__init__(event_type=event_type) - self.status_code = status_code - self.domain = domain - - def get_influx_event(self): - event = super().get_influx_event() - event[0]['fields']['domain'] = self.domain - event[0]['tags']['status_code'] = self.status_code - return event \ No newline at end of file diff --git a/redditrepostsleuth/core/model/events/repostevent.py b/redditrepostsleuth/core/model/events/repostevent.py deleted file mode 100644 index 2ddbfaf..0000000 --- a/redditrepostsleuth/core/model/events/repostevent.py +++ /dev/null @@ -1,14 +0,0 @@ -from redditrepostsleuth.core.model.events.influxevent import InfluxEvent - - -class RepostEvent(InfluxEvent): - def __init__(self, event_type: str = None, status: str = None, post_type: str = None, repost_of: str = None): - super().__init__(event_type=event_type, status=status) - self.post_type = post_type - self.repost_of = repost_of - - def get_influx_event(self): - event = super().get_influx_event() - event[0]['tags']['post_type'] = self.post_type - event[0]['tags']['repost_type'] = self.repost_of - return event \ No newline at end of file diff --git a/redditrepostsleuth/core/model/events/sub_monitor_event.py b/redditrepostsleuth/core/model/events/sub_monitor_event.py deleted file mode 100644 index 5afc546..0000000 --- a/redditrepostsleuth/core/model/events/sub_monitor_event.py +++ /dev/null @@ -1,16 +0,0 @@ -from redditrepostsleuth.core.model.events.influxevent import InfluxEvent - - -class SubMonitorEvent(InfluxEvent): - def __init__(self, process_time: float, post_count: int, subreddit: str, event_type=None): - super(SubMonitorEvent, self).__init__(event_type=event_type) - self.process_time = process_time - self.post_count = post_count - self.subreddit = subreddit - - def get_influx_event(self): - event = super().get_influx_event() - event[0]['fields']['process_time'] = self.process_time - event[0]['fields']['post_count'] = self.post_count - event[0]['tags']['subreddit'] = self.subreddit - return event \ No newline at end of file diff --git a/redditrepostsleuth/core/model/events/summonsevent.py b/redditrepostsleuth/core/model/events/summonsevent.py deleted file mode 100644 index 5bd30fd..0000000 --- a/redditrepostsleuth/core/model/events/summonsevent.py +++ /dev/null @@ -1,18 +0,0 @@ -from redditrepostsleuth.core.model.events.influxevent import InfluxEvent - - -class SummonsEvent(InfluxEvent): - def __init__(self, response_time, summons_time, user, event_type=None): - super(SummonsEvent, self).__init__(event_type=event_type) - self.response_time = response_time - self.summons_time = str(summons_time) - self.count = 1 - self.user = user - - def get_influx_event(self): - event = super().get_influx_event() - event[0]['fields']['response_time'] = self.response_time - event[0]['fields']['summons_time'] = self.summons_time - event[0]['fields']['count'] = self.count - event[0]['tags']['user'] = self.user - return event diff --git a/redditrepostsleuth/core/services/duplicateimageservice.py b/redditrepostsleuth/core/services/duplicateimageservice.py index b050dab..53da332 100644 --- a/redditrepostsleuth/core/services/duplicateimageservice.py +++ b/redditrepostsleuth/core/services/duplicateimageservice.py @@ -9,7 +9,6 @@ from requests.exceptions import ConnectionError from sqlalchemy.exc import IntegrityError -from redditrepostsleuth.core.celery.admin_tasks import delete_post_task from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import Post, MemeTemplate, MemeHash from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager @@ -395,7 +394,7 @@ def _final_meme_filter(self, match_hash = meme_hashes['dhash_h'] except ImageConversionException: log.warning('Failed to get meme hash for %s. Sending to delete queue', match.post.post_id) - delete_post_task.apply_async((match.post.post_id,)) + #delete_post_task.apply_async((match.post.post_id,)) continue except Exception: log.exception('Failed to get meme hash for %s', match.post.url, exc_info=True) diff --git a/redditrepostsleuth/core/services/eventlogging.py b/redditrepostsleuth/core/services/eventlogging.py index 1d878ee..2f179d7 100644 --- a/redditrepostsleuth/core/services/eventlogging.py +++ b/redditrepostsleuth/core/services/eventlogging.py @@ -66,7 +66,7 @@ def _flush_unsaved(self) -> NoReturn: def _write_to_influx(self, event: InfluxEvent) -> bool: try: self._influx_client.write(bucket=self._config.influx_bucket, record=event.get_influx_event()) - #log.debug('Wrote to Influx: %s', event.get_influx_event()) + log.debug('Wrote to Influx: %s', event.get_influx_event()) self._successive_failures = 0 return True except Exception as e: diff --git a/redditrepostsleuth/core/services/response_handler.py b/redditrepostsleuth/core/services/response_handler.py index 95ab8e0..09ba0bf 100644 --- a/redditrepostsleuth/core/services/response_handler.py +++ b/redditrepostsleuth/core/services/response_handler.py @@ -197,19 +197,18 @@ def send_mod_mail(self, subreddit_name: str, message_body: str, subject: str, so if self.test_mode: message_body = REPLY_TEST_MODE + message_body - try: - if self.live_response: - subreddit.message(subject, message_body) - self._save_private_message( - BotPrivateMessage( - subject=subject, - body=message_body, - triggered_from=source, - recipient=f'r/{subreddit_name}' - ) + + if self.live_response: + subreddit.message(subject, message_body) + self._save_private_message( + BotPrivateMessage( + subject=subject, + body=message_body, + triggered_from=source, + recipient=f'r/{subreddit_name}' ) - except RedditAPIException: - log.exception('Problem sending modmail message', exc_info=True) + ) + def _save_private_message(self, bot_message: BotPrivateMessage) -> NoReturn: """ diff --git a/redditrepostsleuth/core/services/subreddit_config_updater.py b/redditrepostsleuth/core/services/subreddit_config_updater.py index 41083cc..6fdf13e 100644 --- a/redditrepostsleuth/core/services/subreddit_config_updater.py +++ b/redditrepostsleuth/core/services/subreddit_config_updater.py @@ -10,6 +10,7 @@ from sqlalchemy import func from sqlalchemy.exc import IntegrityError +from redditrepostsleuth.core.celery.tasks.reddit_action_tasks import send_modmail_task from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import MonitoredSub, MonitoredSubConfigRevision from redditrepostsleuth.core.db.db_utils import get_db_engine @@ -97,8 +98,8 @@ def check_for_config_update(self, monitored_sub: MonitoredSub, notify_missing_ke return if notify_missing_keys: - if self._notify_new_options(subreddit, missing_keys): - self._set_config_notified(wiki_page.revision_id) + self._notify_new_options(subreddit, missing_keys) + self._set_config_notified(wiki_page.revision_id) def create_initial_wiki_config(self, subreddit: Subreddit, wiki_page: WikiPage, monitored_sub: MonitoredSub) -> NoReturn: @@ -335,8 +336,8 @@ def _load_new_config(self, wiki_page: WikiPage, monitored_sub: MonitoredSub, sub wiki_config = self.get_wiki_config(wiki_page) except JSONDecodeError as e: self._set_config_validity(wiki_page.revision_id, valid=False) - if self._notify_failed_load(subreddit, str(e), wiki_page.revision_id): - self._set_config_notified(wiki_page.revision_id) + self._notify_failed_load(subreddit, str(e), wiki_page.revision_id) + self._set_config_notified(wiki_page.revision_id) raise self._update_monitored_sub_from_wiki(monitored_sub, wiki_config) @@ -344,8 +345,8 @@ def _load_new_config(self, wiki_page: WikiPage, monitored_sub: MonitoredSub, sub uow.monitored_sub.update(monitored_sub) uow.commit() self._set_config_validity(wiki_page.revision_id, True) - if self._notify_successful_load(wiki_page.subreddit): - self._set_config_notified(wiki_page.revision_id) + self._notify_successful_load(wiki_page.subreddit) + self._set_config_notified(wiki_page.revision_id) return wiki_config @@ -370,16 +371,16 @@ def _notify_config_created(self, subreddit: Subreddit) -> bool: :return: bool for successful or failed message """ log.info('Sending config created notification to %s', subreddit.display_name) - try: - self.response_handler.send_mod_mail( + + send_modmail_task.apply_async( + ( subreddit.display_name, - 'Repost Sleuth Has Loaded Your New Config!', - 'I saw your config changes and have loaded them! \n\n I\'ll start using them now.' - ) - return True - except Exception as e: - log.exception('Failed to send config created notification') - return False + 'I saw your config changes and have loaded them! \n\n I\'ll start using them now.', + 'Repost Sleuth Has Loaded Your New Config!' + ), + {'source': 'config_updater'} + ) + def _notify_failed_load(self, subreddit: Subreddit, error: Text, revision_id: Text) -> bool: if self.notification_svc: @@ -391,46 +392,51 @@ def _notify_failed_load(self, subreddit: Subreddit, error: Text, revision_id: Te f'Error: {error} \n\n' \ 'Please validate your changes and try again' - try: - self.response_handler.send_mod_mail(subreddit.display_name, body, 'Repost Sleuth Failed To Load Config', source='submonitor') - return True - except Exception as e: - log.exception('Failed to send PM to %s', subreddit.display_name) - return False + send_modmail_task.apply_async( + ( + subreddit.display_name, + body, + 'Repost Sleuth Failed To Load Config' + ), + {'source': 'config_updater'} + ) - def _notify_successful_load(self, subreddit: Subreddit) -> bool: + + def _notify_successful_load(self, subreddit: Subreddit) -> None: log.info('Sending notification for successful config update to %s', subreddit.display_name) if self.notification_svc: self.notification_svc.send_notification( f'New config loaded for r/{subreddit.display_name}', subject=f'Subreddit Config Load Success' ) - try: - self.response_handler.send_mod_mail( + + send_modmail_task.apply_async( + ( subreddit.display_name, - 'Repost Sleuth Has Loaded Your New Config!', 'I saw your config changes and have loaded them! \n\n I\'ll start using them now.', - source='submonitor' - ) - return True - except Exception as e: - log.exception('Failed to send PM to %s', subreddit.display_name) - return False + 'Repost Sleuth Has Loaded Your New Config!', + ), + {'source': 'config_updater'} + ) + - def _notify_new_options(self, subreddit: Subreddit, config_keys: List[Text]) -> bool: + def _notify_new_options(self, subreddit: Subreddit, config_keys: List[Text]) -> None: log.info('Sending notification for new config keys being added to %s. %s', config_keys, subreddit.display_name) if self.notification_svc: self.notification_svc.send_notification( f'Added now config keys to r/{subreddit.display_name}\n{config_keys}\nhttps://reddit.com/r/{subreddit.display_name}', subject='New Config Options Notification Sent' ) - try: - message = f'Your Repost Sleuth config was missing some newly available options.\n\n I\'ve added the following options to your config: {config_keys}\n\nYou can read more about them here: https://www.reddit.com/r/RepostSleuthBot/wiki/add-you-sub/configure-repost-sleuth#wiki_config_value_explanation' - self.response_handler.send_mod_mail(subreddit.display_name, message, 'New Repost Sleuth Options Available!', source='submonitor') - return True - except Exception as e: - log.exception('Failed to send PM to %s', subreddit.display_name) - return False + message = f'Your Repost Sleuth config was missing some newly available options.\n\n I\'ve added the following options to your config: {config_keys}\n\nYou can read more about them here: https://www.reddit.com/r/RepostSleuthBot/wiki/add-you-sub/configure-repost-sleuth#wiki_config_value_explanation' + send_modmail_task.apply_async( + ( + subreddit.display_name, + message, + 'New Repost Sleuth Options Available!' + ), + {'source': 'config_updater'} + ) + def _set_config_validity(self, revision_id: Text, valid: bool) -> NoReturn: with self.uowm.start() as uowm: diff --git a/redditrepostsleuth/core/util/helpers.py b/redditrepostsleuth/core/util/helpers.py index d3c9d03..dd4ba03 100644 --- a/redditrepostsleuth/core/util/helpers.py +++ b/redditrepostsleuth/core/util/helpers.py @@ -7,6 +7,7 @@ import requests from praw import Reddit +from praw.models import Subreddit from redis import Redis from redlock import RedLockFactory from requests.exceptions import ConnectionError @@ -26,6 +27,14 @@ from redditrepostsleuth.core.util.reddithelpers import get_reddit_instance +def get_removal_reason_id(removal_reason: str, subreddit: Subreddit) -> Optional[str]: + if not removal_reason: + return None + for r in subreddit.mod.removal_reasons: + if r.title.lower() == removal_reason.lower(): + return r.id + return None + def post_type_from_url(url: str) -> str: """ Try to guess post type based off URL @@ -483,8 +492,8 @@ def get_next_ids(start_id, count): return ids def generate_next_ids(start_id, count): - start_num = base36decode(start_id) - for id_num in range(start_num, start_num + count): + #start_num = base36decode(start_id) + for id_num in range(start_id, start_id + count): yield base36encode(id_num) diff --git a/redditrepostsleuth/core/util/onlyfans_handling.py b/redditrepostsleuth/core/util/onlyfans_handling.py index 63ede19..fe5faf0 100644 --- a/redditrepostsleuth/core/util/onlyfans_handling.py +++ b/redditrepostsleuth/core/util/onlyfans_handling.py @@ -8,7 +8,7 @@ import requests from praw import Reddit -from prawcore import TooManyRequests +from prawcore import TooManyRequests, NotFound from requests import Response from requests.exceptions import ConnectionError from sqlalchemy import func @@ -115,6 +115,20 @@ def fetch_from_util_api(url: str) -> Response: return response +def check_bio_for_promoter_links(username: str, reddit: Reddit) -> Optional[str]: + try: + redditor = reddit.redditor(username) + bio = redditor.subreddit.public_description + except (NotFound, AttributeError): + log.warning('Failed to get Redditor bio for username %s', username) + return + + log.debug('Checking for of %s: %s', username, redditor.subreddit.public_description) + + for domain in flagged_words: + if domain in bio: + return domain + def get_profile_links(username: str) -> list[str]: url = f'{config.util_api}/profile?username={username}' response = fetch_from_util_api(url) @@ -133,7 +147,12 @@ def get_profile_links(username: str) -> list[str]: raise UtilApiException(f'Unexpected status {response.status_code} from util API') -def check_user_for_promoter_links(username: str) -> Optional[LinkCheckResult]: +def check_user_for_promoter_links(username: str, reddit: Reddit) -> Optional[LinkCheckResult]: + + flagged_bio_domain = check_bio_for_promoter_links(username, reddit) + + if flagged_bio_domain: + return LinkCheckResult(source='Bio', url=flagged_bio_domain) profile_links = get_profile_links(username) @@ -182,6 +201,8 @@ def get_links_from_comments(username: str) -> list[str]: case 403: log.warning('Got unauthorized when checking user comments for %s', username) raise UserNotFound(f'User {username} does not exist or is banned') + case 407: + return [] case 429: log.warning('Rate limited') raise UtilApiException(f'Rate limited') @@ -224,7 +245,7 @@ def get_links_from_comments_praw(username: str, reddit: Reddit) -> list[str]: return list(set(all_urls)) -def check_user_for_only_fans(uow: UnitOfWork, username: str) -> Optional[UserReview]: +def check_user_for_only_fans(uow: UnitOfWork, username: str, reddit: Reddit) -> Optional[UserReview]: skip_names = ['[deleted]', 'AutoModerator'] if username in skip_names: @@ -236,7 +257,7 @@ def check_user_for_only_fans(uow: UnitOfWork, username: str) -> Optional[UserRev if user: delta = datetime.utcnow() - user.last_checked - if delta.days < 30: + if delta.days < 7: log.info('Skipping existing user %s, last check was %s days ago', username, delta.days) return user.content_links_found = False @@ -247,7 +268,7 @@ def check_user_for_only_fans(uow: UnitOfWork, username: str) -> Optional[UserRev if not user: user = UserReview(username=username) try: - result = check_user_for_promoter_links(username) + result = check_user_for_promoter_links(username, reddit) except UserNotFound as e: log.warning(e) return diff --git a/redditrepostsleuth/core/util/replytemplates.py b/redditrepostsleuth/core/util/replytemplates.py index 3a0bfb1..7dcfea7 100644 --- a/redditrepostsleuth/core/util/replytemplates.py +++ b/redditrepostsleuth/core/util/replytemplates.py @@ -41,8 +41,10 @@ '{first_seen} {oldest_percent_match} match. {last_seen} {newest_percent_match} match \n\n' MONITORED_SUB_ADDED = 'Congratulations! Your Subreddit is now monitored by Repost Sleuth Bot. It will start scanning all of your new posts shortly\n\n' \ - 'If you gave me wiki permissions you can find my configuration file here {wiki_config}\n\n' \ + 'You manage the bots settings by visiting https://repostsleuth.com, logging in with your Reddit account and selecting your Subreddit' \ + 'If you gave me wiki permissions you can find the configuration file here {wiki_config}\n\n' \ 'You can find details about the configuration options [here](https://www.reddit.com/r/RepostSleuthBot/wiki/add-you-sub#wiki_configuration)\n\n' \ + 'Please note, managing settings via the Wiki page can be cumbersome and error prone. Using the website allows the use of a simple interface to change settings\n\n' \ 'If you notice any issues please report them at r/RepostSleuthBot\n\n' \ 'You can also manage the bots settings by visiting https://repostsleuth.com' diff --git a/redditrepostsleuth/ingestsvc/ingestsvc.py b/redditrepostsleuth/ingestsvc/ingestsvc.py index 0abf31f..e4657ac 100644 --- a/redditrepostsleuth/ingestsvc/ingestsvc.py +++ b/redditrepostsleuth/ingestsvc/ingestsvc.py @@ -5,22 +5,22 @@ import time from asyncio import ensure_future, gather, run, TimeoutError, CancelledError from datetime import datetime -from typing import List, Optional +from typing import List, Optional, Union, Generator from aiohttp import ClientSession, ClientTimeout, ClientConnectorError, TCPConnector, \ ServerDisconnectedError, ClientOSError +from praw import Reddit from redditrepostsleuth.core.celery.tasks.ingest_tasks import save_new_post, save_new_posts from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import Post from redditrepostsleuth.core.db.db_utils import get_db_engine from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager -from redditrepostsleuth.core.exception import RateLimitException, UtilApiException +from redditrepostsleuth.core.exception import RateLimitException, UtilApiException, RedditTokenExpiredException from redditrepostsleuth.core.logging import configure_logger from redditrepostsleuth.core.model.misc_models import BatchedPostRequestJob, JobStatus from redditrepostsleuth.core.util.helpers import get_reddit_instance, get_newest_praw_post_id, get_next_ids, \ base36decode, generate_next_ids -from redditrepostsleuth.core.util.objectmapping import reddit_submission_to_post from redditrepostsleuth.core.util.utils import build_reddit_query_string log = configure_logger(name='redditrepostsleuth') @@ -39,7 +39,6 @@ REMOVAL_REASONS_TO_SKIP = ['deleted', 'author', 'reddit', 'copyright_takedown'] HEADERS = {'User-Agent': 'u/RepostSleuthBot - Submission Ingest (by u/BarryCarey)'} - async def fetch_page(url: str, session: ClientSession) -> Optional[str]: """ Fetch a single URL with AIOHTTP @@ -62,8 +61,11 @@ async def fetch_page(url: str, session: ClientSession) -> Optional[str]: if resp.status == 429: text = await resp.text() raise RateLimitException('Data API rate limit') + elif resp.status == 401: + raise RedditTokenExpiredException('Token expired') log.info('Unexpected request status %s - %s', resp.status, url) return + except (ClientOSError, TimeoutError): log.exception('') @@ -88,6 +90,9 @@ async def fetch_page_as_job(job: BatchedPostRequestJob, session: ClientSession) elif resp.status == 429: log.warning('Data API Rate Limit') job.status = JobStatus.RATELIMIT + elif resp.status == 500: + log.warning('Reddit Server Error') + job.status = JobStatus.ERROR else: log.warning('Unexpected request status %s - %s', resp.status, job.url) job.status = JobStatus.ERROR @@ -107,8 +112,20 @@ async def fetch_page_as_job(job: BatchedPostRequestJob, session: ClientSession) return job +async def ingest_range(newest_post_id: Union[str, int], oldest_post_id: Union[str, int], alt_headers: dict = None) -> None: + if isinstance(newest_post_id, str): + newest_post_id = base36decode(newest_post_id) + + if isinstance(oldest_post_id, str): + oldest_post_id = base36decode(oldest_post_id) + + missing_ids = generate_next_ids(oldest_post_id, newest_post_id - oldest_post_id) + log.info('Total missing IDs: %s', newest_post_id - oldest_post_id) + await ingest_sequence(missing_ids, alt_headers=alt_headers) + -async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: + +async def ingest_sequence(ids: Union[list[int], Generator[int, None, None]], alt_headers: dict = None) -> None: """ Take a range of posts and attempt to ingest them. @@ -116,20 +133,26 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: :param newest_post_id: Most recent Post ID, usually pulled from Praw :param oldest_post_id: Oldest post ID, is usually the most recent post ingested in the database """ - missing_ids = generate_next_ids(oldest_post_id, base36decode(newest_post_id) - base36decode(oldest_post_id)) - batch = [] + if isinstance(ids, list): + def id_gen(list_of_ids): + for id in list_of_ids: + yield id + ids = id_gen(ids) + + saved_posts = 0 tasks = [] conn = TCPConnector(limit=0) - async with ClientSession(connector=conn, headers=HEADERS) as session: + + async with ClientSession(connector=conn, headers=alt_headers or HEADERS) as session: while True: try: - chunk = list(itertools.islice(missing_ids, 100)) + chunk = list(itertools.islice(ids, 100)) except StopIteration: break #url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(chunk)}' - url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(chunk)}' + url = f'https://oauth.reddit.com/api/info?id={build_reddit_query_string(chunk)}' job = BatchedPostRequestJob(url, chunk, JobStatus.STARTED) tasks.append(ensure_future(fetch_page_as_job(job, session))) if len(tasks) >= 50 or len(chunk) == 0: @@ -151,6 +174,7 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: if post['data']['removed_by_category'] in REMOVAL_REASONS_TO_SKIP: continue posts_to_save.append(post['data']) + saved_posts += 1 else: tasks.append(ensure_future(fetch_page_as_job(j, session))) @@ -167,6 +191,7 @@ async def ingest_range(newest_post_id: str, oldest_post_id: str) -> None: if len(chunk) == 0: break + log.info('Saved posts: %s', saved_posts) log.info('Finished backfill ') @@ -179,25 +204,60 @@ def queue_posts_for_ingest(posts: List[Post]): for post in posts: save_new_post.apply_async((post,)) +def get_request_delay(submissions: list[dict], current_req_delay: int, target_ingest_delay: int = 30) -> int: + ingest_delay = datetime.utcnow() - datetime.utcfromtimestamp( + submissions[0]['data']['created_utc']) + log.info('Current Delay: %s', ingest_delay) + + if ingest_delay.seconds > target_ingest_delay: + new_delay = current_req_delay - 1 if current_req_delay > 0 else 0 + else: + new_delay = current_req_delay + 1 + + log.info('New Delay: %s', new_delay) + return new_delay + +def get_auth_headers(reddit: Reddit) -> dict: + """ + For praw to make a call. + + Hackey but I'd rather let Praw deal handle the tokens + :param reddit: + :return: + """ + reddit.user.me() + return {**HEADERS, **{'Authorization': f'Bearer {reddit.auth._reddit._core._authorizer.access_token}'}} async def main() -> None: log.info('Starting post ingestor') reddit = get_reddit_instance(config) + allowed_submission_delay_seconds = 90 + missed_id_retry_count = 3000 + newest_id = get_newest_praw_post_id(reddit) uowm = UnitOfWorkManager(get_db_engine(config)) + auth_headers = get_auth_headers(reddit) with uowm.start() as uow: oldest_post = uow.posts.get_newest_post() oldest_id = oldest_post.post_id - await ingest_range(newest_id, oldest_id) + await ingest_range(newest_id, oldest_id, alt_headers=auth_headers) - delay = 0 + request_delay = 0 + missed_ids = [] # IDs that we didn't get results back for or had a removal reason + last_token_refresh = datetime.utcnow() while True: + + if (datetime.utcnow() - last_token_refresh).seconds > 600: + log.info('Refreshing token') + auth_headers = get_auth_headers(reddit) + last_token_refresh = datetime.utcnow() + ids_to_get = get_next_ids(newest_id, 100) - #url = f'{config.util_api}/reddit/info?submission_ids={build_reddit_query_string(ids_to_get)}' - url = f'https://api.reddit.com/api/info?id={build_reddit_query_string(ids_to_get)}' - async with ClientSession(headers=HEADERS) as session: + + url = f'https://oauth.reddit.com/api/info?id={build_reddit_query_string(ids_to_get)}' + async with ClientSession(headers=auth_headers) as session: try: log.debug('Sending fetch request') results = await fetch_page(url, session) @@ -209,24 +269,21 @@ async def main() -> None: log.warning('Hit Data API Rate Limit') await asyncio.sleep(10) continue + except RedditTokenExpiredException: + auth_headers = get_auth_headers(reddit) + continue if not results: log.debug('No results') continue res_data = json.loads(results) + if not res_data or not len(res_data['data']['children']): log.info('No results') continue log.info('%s results returned from API', len(res_data['data']['children'])) - if len(res_data['data']['children']) < 91: - delay += 1 - log.debug('Delay increased by 1. Current delay: %s', delay) - else: - if delay > 0: - delay -= 1 - log.debug('Delay decreased by 1. Current delay: %s', delay) posts_to_save = [] for post in res_data['data']['children']: @@ -235,17 +292,23 @@ async def main() -> None: posts_to_save.append(post['data']) log.info('Sending %s posts to save queue', len(posts_to_save)) - # queue_posts_for_ingest([reddit_submission_to_post(submission) for submission in posts_to_save]) + queue_posts_for_ingest(posts_to_save) - ingest_delay = datetime.utcnow() - datetime.utcfromtimestamp( - res_data['data']['children'][0]['data']['created_utc']) - log.info('Current Delay: %s', ingest_delay) + request_delay = get_request_delay(res_data['data']['children'], request_delay, allowed_submission_delay_seconds) newest_id = res_data['data']['children'][-1]['data']['id'] - time.sleep(delay) + saved_ids = [x['id'] for x in posts_to_save] + missing_ids_in_this_req = list(set(ids_to_get).difference(saved_ids)) + missed_ids += [base36decode(x) for x in missing_ids_in_this_req] + time.sleep(request_delay) + + log.info('Missed IDs: %s', len(missed_ids)) + if len(missed_ids) > missed_id_retry_count: + await ingest_sequence(missed_ids, alt_headers=auth_headers) + missed_ids = [] if __name__ == '__main__': run(main()) \ No newline at end of file diff --git a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py index 8856d3f..a24d94e 100644 --- a/redditrepostsleuth/submonitorsvc/monitored_sub_service.py +++ b/redditrepostsleuth/submonitorsvc/monitored_sub_service.py @@ -2,24 +2,24 @@ from typing import Optional from praw import Reddit -from praw.exceptions import APIException -from praw.models import Submission, Comment, Subreddit -from prawcore import Forbidden +from praw.models import Submission +from redditrepostsleuth.core.celery.tasks.reddit_action_tasks import leave_comment_task, report_submission_task, \ + mark_as_oc_task, lock_submission_task, remove_submission_task, send_modmail_task, ban_user_task from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.databasemodels import Post, MonitoredSub, MonitoredSubChecks, UserWhitelist from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager from redditrepostsleuth.core.model.search.image_search_results import ImageSearchResults from redditrepostsleuth.core.model.search.search_results import SearchResults +from redditrepostsleuth.core.notification.notification_service import NotificationService from redditrepostsleuth.core.services.duplicateimageservice import DuplicateImageService from redditrepostsleuth.core.services.eventlogging import EventLogging -from redditrepostsleuth.core.services.response_handler import ResponseHandler from redditrepostsleuth.core.services.responsebuilder import ResponseBuilder from redditrepostsleuth.core.util.helpers import build_msg_values_from_search, build_image_msg_values_from_search, \ get_image_search_settings_for_monitored_sub, get_link_search_settings_for_monitored_sub, \ get_text_search_settings_for_monitored_sub -from redditrepostsleuth.core.util.replytemplates import REPOST_MODMAIL, NO_BAN_PERMISSIONS, HIGH_VOLUME_REPOSTER_FOUND, \ +from redditrepostsleuth.core.util.replytemplates import REPOST_MODMAIL, HIGH_VOLUME_REPOSTER_FOUND, \ ADULT_PROMOTER_SUBMISSION_FOUND from redditrepostsleuth.core.util.repost.repost_helpers import filter_search_results from redditrepostsleuth.core.util.repost.repost_search import image_search_by_post, link_search, text_search_by_post @@ -35,7 +35,6 @@ def __init__( uowm: UnitOfWorkManager, reddit: Reddit, response_builder: ResponseBuilder, - response_handler: ResponseHandler, event_logger: EventLogging = None, config: Config = None ): @@ -43,9 +42,8 @@ def __init__( self.uowm = uowm self.reddit = reddit self.response_builder = response_builder - self.resposne_handler = response_handler self.event_logger = event_logger - self.notification_svc = None + self.notification_svc = NotificationService(config) if config: self.config = config else: @@ -53,21 +51,7 @@ def __init__( def _ban_user(self, username: str, subreddit_name: str, ban_reason: str, note: str = None) -> None: log.info('Banning user %s from %s', username, subreddit_name) - subreddit = self.reddit.subreddit(subreddit_name) - try: - subreddit.banned.add(username, ban_reason=ban_reason, note=note) - except Forbidden: - log.warning('Unable to ban user %s on %s. No permissions', username, subreddit_name) - message_body = NO_BAN_PERMISSIONS.format( - username=username, - subreddit=subreddit_name - ) - self.resposne_handler.send_mod_mail( - subreddit_name, - message_body, - f'Unable To Ban User, No Permissions', - source='sub_monitor' - ) + ban_user_task.apply_async((username, subreddit_name, ban_reason, note)) def handle_only_fans_check( self, @@ -108,11 +92,11 @@ def handle_only_fans_check( if monitored_sub.adult_promoter_remove_post: if self.notification_svc: self.notification_svc.send_notification( - f'Post by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', + f'[Post](https://redd.it/{post.post_id}) by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='Onlyfans Removal' ) - self._remove_post( + self._remove_submission( monitored_sub.adult_promoter_removal_reason, self.reddit.submission(post.post_id) ) @@ -120,7 +104,7 @@ def handle_only_fans_check( if monitored_sub.adult_promoter_ban_user: if self.notification_svc: self.notification_svc.send_notification( - f'User [{post.author}](https://reddit.com/u/{post.author}) banned from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', + f'User [{post.author}](https://reddit.com/u/{post.author}) banned from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit}) for [this post](https://redd.it/{post.post_id})', subject='Onlyfans Ban Issued' ) self._ban_user(post.author, monitored_sub.name, monitored_sub.adult_promoter_ban_reason or user.notes) @@ -131,11 +115,9 @@ def handle_only_fans_check( subreddit=monitored_sub.name, post_id=post.post_id, ) - self.resposne_handler.send_mod_mail( - monitored_sub.name, - message_body, - f'New Submission From Adult Content Promoter', - source='sub_monitor' + + send_modmail_task.apply_async( + (monitored_sub.name, message_body, f'New Submission From Adult Content Promoter') ) @@ -183,7 +165,7 @@ def handle_high_volume_reposter_check( f'Post by [{post.author}](https://reddit.com/u/{post.author}) removed from [r/{post.subreddit}](https://reddit.com/r/{post.subreddit})', subject='High Volume Removal' ) - self._remove_post( + self._remove_submission( monitored_sub.high_volume_reposter_removal_reason, self.reddit.submission(post.post_id), mod_note='High volume of reposts detected by Repost Sleuth' @@ -208,11 +190,9 @@ def handle_high_volume_reposter_check( post_id=post.post_id, repost_count=repost_count ) - self.resposne_handler.send_mod_mail( - monitored_sub.name, - message_body, - f'New Submission From High Volume Reposter', - source='sub_monitor' + + send_modmail_task.apply_async( + (monitored_sub.name, message_body, f'New Submission From High Volume Reposter') ) def has_post_been_checked(self, post_id: str) -> bool: @@ -289,23 +269,13 @@ def check_submission(self, monitored_sub: MonitoredSub, post: Post) -> Optional[ f'https://redd.it/{search_results.checked_post.post_id}') return search_results - reply_comment = None if monitored_sub.comment_on_repost: - try: - reply_comment = self._leave_comment(search_results, monitored_sub) - except APIException as e: - if e.error_type == 'THREAD_LOCKED': - log.warning('Thread locked, unable to leave comment') - else: - raise + self._leave_comment(search_results, monitored_sub) submission = self.reddit.submission(post.post_id) - if not submission: - log.warning('Failed to get submission %s for sub %s. Cannot perform admin functions', post.post_id, post.subreddit) - return - if search_results.matches and self.config.live_responses: + if search_results.matches: msg_values = build_msg_values_from_search(search_results, self.uowm, target_days_old=monitored_sub.target_days_old) if search_results.checked_post.post_type.name == 'image': @@ -313,16 +283,13 @@ def check_submission(self, monitored_sub: MonitoredSub, post: Post) -> Optional[ report_msg = self.response_builder.build_report_msg(monitored_sub.name, msg_values) self._report_submission(monitored_sub, submission, report_msg) - self._lock_post(monitored_sub, submission) + self._lock_submission(monitored_sub, submission) if monitored_sub.remove_repost: - self._remove_post(monitored_sub.removal_reason, submission) + self._remove_submission(monitored_sub.removal_reason, submission) self._send_mod_mail(monitored_sub, search_results) else: self._mark_post_as_oc(monitored_sub, submission) - if reply_comment and self.config.live_responses: - self._sticky_reply(monitored_sub, reply_comment) - self._lock_comment(monitored_sub, reply_comment) self.create_checked_post(search_results, monitored_sub) @@ -388,77 +355,30 @@ def _check_for_repost(self, post: Post, monitored_sub: MonitoredSub) -> ImageSea log.debug(search_results) return search_results - def _sticky_reply(self, monitored_sub: MonitoredSub, comment: Comment) -> None: - if monitored_sub.sticky_comment: - try: - comment.mod.distinguish(sticky=True) - log.info('Made comment %s sticky', comment.id) - except Forbidden: - log.warning('Failed to sticky comment, no permissions') - except Exception as e: - log.exception('Failed to sticky comment', exc_info=True) - - def _lock_comment(self, monitored_sub: MonitoredSub, comment: Comment) -> None: - if monitored_sub.lock_response_comment: - log.info('Attempting to lock comment %s on subreddit %s', comment.id, monitored_sub.name) - try: - comment.mod.lock() - log.info('Locked comment') - except Forbidden: - log.error('Failed to lock comment, no permission') - except Exception as e: - log.exception('Failed to lock comment', exc_info=True) - - def _remove_post(self, removal_reason: str, submission: Submission, mod_note: str = None) -> None: + + def _remove_submission(self, removal_reason: str, submission: Submission, mod_note: str = None) -> None: """ Check if given sub wants posts removed. Remove is enabled @param monitored_sub: Monitored sub @param submission: Submission to remove """ - try: - removal_reason_id = self._get_removal_reason_id(removal_reason, submission.subreddit) - log.info('Attempting to remove post https://redd.it/%s with removal ID %s', submission.id, removal_reason_id) - submission.mod.remove(reason_id=removal_reason_id, mod_note=mod_note) - except Forbidden: - log.error('Failed to remove post https://redd.it/%s, no permission', submission.id) - except Exception as e: - log.exception('Failed to remove submission https://redd.it/%s', submission.id, exc_info=True) + remove_submission_task.apply_async((submission, removal_reason), {'mod_note': mod_note}) - def _get_removal_reason_id(self, removal_reason: str, subreddit: Subreddit) -> Optional[str]: - if not removal_reason: - return None - for r in subreddit.mod.removal_reasons: - if r.title.lower() == removal_reason.lower(): - return r.id - return None - def _lock_post(self, monitored_sub: MonitoredSub, submission: Submission) -> None: + def _lock_submission(self, monitored_sub: MonitoredSub, submission: Submission) -> None: if monitored_sub.lock_post: - try: - submission.mod.lock() - except Forbidden: - log.error('Failed to lock post https://redd.it/%s, no permission', submission.id) - except Exception as e: - log.exception('Failed to lock submission https://redd.it/%s', submission.id, exc_info=True) + lock_submission_task.apply_async((submission,)) def _mark_post_as_oc(self, monitored_sub: MonitoredSub, submission: Submission) -> None: if monitored_sub.mark_as_oc: - try: - submission.mod.set_original_content() - except Forbidden: - log.error('Failed to set post OC https://redd.it/%s, no permission', submission.id) - except Exception as e: - log.exception('Failed to set post OC https://redd.it/%s', submission.id, exc_info=True) + mark_as_oc_task.apply_async((submission,)) def _report_submission(self, monitored_sub: MonitoredSub, submission: Submission, report_msg: str) -> None: if not monitored_sub.report_reposts: return log.info('Reporting post %s on %s', f'https://redd.it/{submission.id}', monitored_sub.name) - try: - submission.report(report_msg[:99]) # TODO: Until database column length is fixed - except Exception as e: - log.exception('Failed to report submission', exc_info=True) + report_submission_task.apply_async((submission, report_msg)) def _send_mod_mail(self, monitored_sub: MonitoredSub, search_results: SearchResults) -> None: """ @@ -468,6 +388,7 @@ def _send_mod_mail(self, monitored_sub: MonitoredSub, search_results: SearchResu """ if not monitored_sub.send_repost_modmail: return + message_body = REPOST_MODMAIL.format( subreddit=monitored_sub.name, match_count=len(search_results.matches), @@ -476,14 +397,14 @@ def _send_mod_mail(self, monitored_sub: MonitoredSub, search_results: SearchResu oldest_match=search_results.matches[0].post.perma_link if search_results.matches else None, title=search_results.checked_post.title ) - self.resposne_handler.send_mod_mail( - monitored_sub.name, - message_body, - f'Repost found in r/{monitored_sub.name}', - source='sub_monitor' - ) - def _leave_comment(self, search_results: ImageSearchResults, monitored_sub: MonitoredSub, post_db_id: int = None) -> Comment: + send_modmail_task.apply_async((monitored_sub.name, message_body, f'Repost found in r/{monitored_sub.name}'), {'source': 'sub_monitor'}) + + def _leave_comment(self, search_results: ImageSearchResults, monitored_sub: MonitoredSub) -> None: message = self.response_builder.build_sub_comment(monitored_sub, search_results, signature=False) - return self.resposne_handler.reply_to_submission(search_results.checked_post.post_id, message, 'submonitor') + leave_comment_task.apply_async( + (search_results.checked_post.post_id, message), + {'sticky_comment': monitored_sub.sticky_comment, 'lock_comment': monitored_sub.lock_response_comment} + ) + diff --git a/tests/adminsvc/test_new_activation_monitor.py b/tests/adminsvc/test_new_activation_monitor.py index 340f523..2e9d186 100644 --- a/tests/adminsvc/test_new_activation_monitor.py +++ b/tests/adminsvc/test_new_activation_monitor.py @@ -27,21 +27,6 @@ def test_check_for_new_invites_no_invite(self): monitor.check_for_new_invites() mocked_monitor.assert_called() - def test__notify_added(self): - sub_repo = MagicMock() - uow = MagicMock() - uowm = MagicMock() - sub_repo.get_by_sub.return_value = MonitoredSub(name='testsub') - type(uow).monitored_sub = mock.PropertyMock(return_value=sub_repo) - uow.__enter__.return_value = uow - uow.commit.return_value = None - uowm.start.return_value = uow - mock_response_hander = Mock(send_mod_mail=Mock(return_value=None)) - monitor = NewActivationMonitor(uowm, Mock(), mock_response_hander) - subreddit = Mock(message=Mock(return_value=None), display_name='testsub') - monitor._notify_added(subreddit) - mock_response_hander.send_mod_mail.assert_called() - self.assertTrue(sub_repo.get_by_sub.activation_notification_sent) def test__create_wiki_page(self): monitor = NewActivationMonitor(Mock(), Mock(), Mock()) diff --git a/tests/core/services/response_builder_expected_responses.py b/tests/core/services/response_builder_expected_responses.py index cabcda3..931a337 100644 --- a/tests/core/services/response_builder_expected_responses.py +++ b/tests/core/services/response_builder_expected_responses.py @@ -1,84 +1,71 @@ IMAGE_OC_NO_CLOSE_NO_SIG_NO_STATS_NO_SEARCH = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ - 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ - '*I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*' + 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.' IMAGE_OC_ONLY_SIGNATURE = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ - 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*' + 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.' IMAGE_OC_ONLY_STATUS = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ - '*I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '---\n\n' \ '**Searched Images:** 0 | **Search Time:** 10s' IMAGE_OC_LINK_ONLY = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ - '*I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)' IMAGE_OC_ONLY_SEARCH_SETTINGS = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ - '*I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' IMAGE_OC_ALL_ENABLED = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' \ + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' \ ' | **Searched Images:** 0 | **Search Time:** 10s' IMAGE_OC_ALL_ENABLED_ALL_ENABLED_NO_MEME = 'I didn\'t find any posts that meet the matching requirements for r/test.\n\n' \ 'It might be OC, it might not. Things such as JPEG artifacts and cropping may impact the results.\n\n' \ 'I did find [this post](https://redd.it/abc123) that is 84.38% similar. It might be a match but I cannot be certain.\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' \ + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' \ ' | **Searched Images:** 0 | **Search Time:** 10s' IMAGE_REPOST_ONE_MATCH_ALL_ENABLED = 'Looks like a repost. I\'ve seen this image 1 time.\n\n' \ 'First Seen [Here](https://redd.it/abc123) on 2019-01-28 68.75% match.\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Positive](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Positive&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' \ + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' \ ' | **Searched Images:** 0 | **Search Time:** 10s' IMAGE_REPOST_MULTI_MATCH_ALL_ENABLED = 'Looks like a repost. I\'ve seen this image 2 times.\n\n' \ 'First Seen [Here](https://redd.it/abc123) on 2019-01-28 68.75% match. Last Seen [Here](https://redd.it/123abc) on 2019-06-28 68.75% match\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Positive](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Positive&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' \ + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' \ ' | **Searched Images:** 0 | **Search Time:** 10s' IMAGE_REPOST_SUBREDDIT_CUSTOM = 'This is a custom repost template. 2 matches\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Positive](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Positive&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' \ + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' \ ' | **Searched Images:** 0 | **Search Time:** 10s' IMAGE_OC_SUBREDDIT_CUSTOM = 'This is a custom OC template. Random Sub test\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - *I\'m not perfect, but you can help. Report [ [False Negative](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Negative&message={"post_id": "abc123", "meme_template": null}) ]*\n\n' \ '[View Search On repostsleuth.com](https://www.repostsleuth.com/search?postId=abc123&sameSub=false&filterOnlyOlder=true&memeFilter=false&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=50)\n\n' \ '---\n\n' \ - '**Scope:** Reddit | **Meme Filter:** False | **Target:** 90% | **Check Title:** False | **Max Age:** 190' \ + '**Scope:** Reddit | **Target Percent:** 90% | **Max Age:** 190' \ ' | **Searched Images:** 0 | **Search Time:** 10s' LINK_OC_ALL_ENABLED = 'Looks like this is the first time this link has been shared on Reddit\n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - \n\n' \ '---\n\n' \ '**Scope:** Reddit | **Check Title:** False | **Max Age:** 190' \ ' | **Searched Links:** 0 | **Search Time:** 10s' LINK_REPOST_ALL_ENABLED = 'This link has been shared 1 time.\n\n' \ 'First Seen [Here](https://redd.it/123abc) on 2019-06-28. \n\n' \ - 'Feedback? Hate? Visit r/repostsleuthbot - \n\n' \ '---\n\n' \ '**Scope:** Reddit | **Check Title:** False | **Max Age:** 190' \ ' | **Searched Links:** 0 | **Search Time:** 10s' \ No newline at end of file diff --git a/tests/submonitorsvc/test_subMonitor.py b/tests/submonitorsvc/test_subMonitor.py index c745db9..d585881 100644 --- a/tests/submonitorsvc/test_subMonitor.py +++ b/tests/submonitorsvc/test_subMonitor.py @@ -1,3 +1,4 @@ +import os from unittest import TestCase from unittest.mock import MagicMock, Mock, patch, ANY @@ -7,6 +8,7 @@ from redditrepostsleuth.core.db.databasemodels import Post, MonitoredSub, PostType, UserReview, UserWhitelist from redditrepostsleuth.submonitorsvc.monitored_sub_service import MonitoredSubService +# TODO - Most of the test need to reworked after config management is changed. class TestMonitoredSubService(TestCase): @@ -45,126 +47,126 @@ def test__should_check_post__whitelisted_user(self): post = Post(post_type=post_type, title='some repost') self.assertFalse(sub_monitor.should_check_post(post, monitored_sub, whitelisted_user=Mock(ignore_repost_detectoin=True))) - def test__send_mod_mail_not_enabled(self): - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - mock_monitored_sub = Mock(send_repost_modmail=False) - sub_monitor._send_mod_mail(mock_monitored_sub, 'test') - mock_response_handler.send_mod_mail.assert_not_called() - - @patch('redditrepostsleuth.submonitorsvc.monitored_sub_service.len') - def test__send_mod_mail_not_enabled(self, mock_len): - mock_len.return_value = 5 - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - monitored_sub = MonitoredSub(name='testsubreddit', send_repost_modmail=True) - sub_monitor._send_mod_mail(monitored_sub, Mock(matches=[], checked_post=Mock(post_id='abc123'))) - expected_message_body = 'Post [https://redd.it/abc123](https://redd.it/abc123) looks like a repost. I found 5 matches' - mock_response_handler.send_mod_mail.assert_called_with('testsubreddit', ANY, 'Repost found in r/testsubreddit', source='sub_monitor') - - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_only_fans_normal_user_no_action(self, mock_ban_user, mock_remove_post): - user_review = UserReview(content_links_found=0, username='test_user') - post = Post(subreddit='test_subreddit', author='test_user') - monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=True, adult_promoter_ban_user=True) - mock_uow = MagicMock(user_review=MagicMock(get_by_username=MagicMock(return_value=user_review))) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - - sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) - - mock_ban_user.assert_not_called() - mock_remove_post.assert_not_called() - - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_only_fans_user_not_found_no_action(self, mock_ban_user, mock_remove_post): - post = Post(subreddit='test_subreddit', author='test_user') - monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=True, adult_promoter_ban_user=True) - mock_uow = MagicMock( - user_review=MagicMock(get_by_username=MagicMock(return_value=None)), - user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) - ) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) + # def test__send_mod_mail_not_enabled(self): + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # mock_monitored_sub = Mock(send_repost_modmail=False) + # sub_monitor._send_mod_mail(mock_monitored_sub, 'test') + # mock_response_handler.send_mod_mail.assert_not_called() - sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # @patch('redditrepostsleuth.submonitorsvc.monitored_sub_service.len') + # def test__send_mod_mail_not_enabled(self, mock_len): + # mock_len.return_value = 5 + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # monitored_sub = MonitoredSub(name='testsubreddit', send_repost_modmail=True) + # sub_monitor._send_mod_mail(monitored_sub, Mock(matches=[], checked_post=Mock(post_id='abc123'))) + # expected_message_body = 'Post [https://redd.it/abc123](https://redd.it/abc123) looks like a repost. I found 5 matches' + # mock_response_handler.send_mod_mail.assert_called_with('testsubreddit', ANY, 'Repost found in r/testsubreddit', source='sub_monitor') - mock_uow.user_review.get_by_username.assert_called_once_with('test_user') - mock_ban_user.assert_not_called() - mock_remove_post.assert_not_called() + # @patch.object(MonitoredSubService, '_remove_post') + # @patch.object(MonitoredSubService, '_ban_user') + # def test__handle_only_fans_normal_user_no_action(self, mock_ban_user, mock_remove_post): + # user_review = UserReview(content_links_found=0, username='test_user') + # post = Post(subreddit='test_subreddit', author='test_user') + # monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=True, adult_promoter_ban_user=True) + # mock_uow = MagicMock(user_review=MagicMock(get_by_username=MagicMock(return_value=user_review))) + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # + # sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # + # mock_ban_user.assert_not_called() + # mock_remove_post.assert_not_called() - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_only_fans_flagged_user_ban_user(self, mock_ban_user, mock_remove_post): - user_review = UserReview(content_links_found=1, username='test_user', notes='Profile links match onlyfans.com') - post = Post(subreddit='test_subreddit', author='test_user') - monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=False, adult_promoter_ban_user=True) - mock_uow = MagicMock( - user_review=MagicMock(get_by_username=MagicMock(return_value=user_review)), - user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) - ) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - - sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # @patch.object(MonitoredSubService, '_remove_post') + # @patch.object(MonitoredSubService, '_ban_user') + # def test__handle_only_fans_user_not_found_no_action(self, mock_ban_user, mock_remove_post): + # post = Post(subreddit='test_subreddit', author='test_user') + # monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=True, adult_promoter_ban_user=True) + # mock_uow = MagicMock( + # user_review=MagicMock(get_by_username=MagicMock(return_value=None)), + # user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) + # ) + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # + # sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # + # mock_uow.user_review.get_by_username.assert_called_once_with('test_user') + # mock_ban_user.assert_not_called() + # mock_remove_post.assert_not_called() - mock_ban_user.assert_called_once_with('test_user', 'test_subreddit', 'Profile links match onlyfans.com') - mock_remove_post.assert_not_called() - - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_only_fans_flagged_user_remove_post(self, mock_ban_user, mock_remove_post): - user_review = UserReview(content_links_found=1, username='test_user', notes='Profile links match onlyfans.com') - post = Post(subreddit='test_subreddit', author='test_user') - monitored_sub = MonitoredSub( - name='test_subreddit', - adult_promoter_remove_post=True, - adult_promoter_ban_user=False, - adult_promoter_removal_reason='Removed' - ) - mock_uow = MagicMock( - user_review=MagicMock(get_by_username=MagicMock(return_value=user_review)), - user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) - ) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) + # @patch.object(MonitoredSubService, '_remove_post') + # @patch.object(MonitoredSubService, '_ban_user') + # def test__handle_only_fans_flagged_user_ban_user(self, mock_ban_user, mock_remove_post): + # user_review = UserReview(content_links_found=1, username='test_user', notes='Profile links match onlyfans.com') + # post = Post(subreddit='test_subreddit', author='test_user') + # monitored_sub = MonitoredSub(name='test_subreddit', adult_promoter_remove_post=False, adult_promoter_ban_user=True) + # mock_uow = MagicMock( + # user_review=MagicMock(get_by_username=MagicMock(return_value=user_review)), + # user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) + # ) + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # + # sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # + # mock_ban_user.assert_called_once_with('test_user', 'test_subreddit', 'Profile links match onlyfans.com') + # mock_remove_post.assert_not_called() - sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # @patch.object(MonitoredSubService, '_remove_post') + # @patch.object(MonitoredSubService, '_ban_user') + # def test__handle_only_fans_flagged_user_remove_post(self, mock_ban_user, mock_remove_post): + # user_review = UserReview(content_links_found=1, username='test_user', notes='Profile links match onlyfans.com') + # post = Post(subreddit='test_subreddit', author='test_user') + # monitored_sub = MonitoredSub( + # name='test_subreddit', + # adult_promoter_remove_post=True, + # adult_promoter_ban_user=False, + # adult_promoter_removal_reason='Removed' + # ) + # mock_uow = MagicMock( + # user_review=MagicMock(get_by_username=MagicMock(return_value=user_review)), + # user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) + # ) + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # + # sub_monitor.handle_only_fans_check(post, mock_uow, monitored_sub) + # + # mock_ban_user.assert_not_called() + # mock_remove_post.assert_called_once_with('Removed', ANY) - mock_ban_user.assert_not_called() - mock_remove_post.assert_called_once_with('Removed', ANY) + # @patch.object(MonitoredSubService, '_remove_post') + # @patch.object(MonitoredSubService, '_ban_user') + # def test__handle_high_volume_reposter_check_under_threshold_no_action(self, mock_ban_user, mock_remove_post): + # mock_uow = MagicMock( + # stat_top_reposter=MagicMock(get_total_reposts_by_author_and_day_range=MagicMock(return_value=50)) + # ) + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # monitored_sub = MonitoredSub( + # name='test_subreddit', + # high_volume_reposter_ban_user=True, + # high_volume_reposter_threshold=100, + # high_volume_reposter_notify_mod_mail=False, + # high_volume_reposter_remove_post=False + # ) + # post = Post(subreddit='test_subreddit', author='test_user') + # sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) + # mock_ban_user.assert_not_called() + # mock_remove_post.assert_not_called() + # mock_response_handler.send_mod_mail.assert_not_called() - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_high_volume_reposter_check_under_threshold_no_action(self, mock_ban_user, mock_remove_post): - mock_uow = MagicMock( - stat_top_reposter=MagicMock(get_total_reposts_by_author_and_day_range=MagicMock(return_value=50)) - ) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - monitored_sub = MonitoredSub( - name='test_subreddit', - high_volume_reposter_ban_user=True, - high_volume_reposter_threshold=100, - high_volume_reposter_notify_mod_mail=False, - high_volume_reposter_remove_post=False - ) - post = Post(subreddit='test_subreddit', author='test_user') - sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) - mock_ban_user.assert_not_called() - mock_remove_post.assert_not_called() - mock_response_handler.send_mod_mail.assert_not_called() - - @patch.object(MonitoredSubService, '_remove_post') + @patch.object(MonitoredSubService, '_remove_submission') @patch.object(MonitoredSubService, '_ban_user') def test__handle_high_volume_reposter_check_over_threshold_remove(self, mock_ban_user, mock_remove_post): mock_uow = MagicMock( @@ -191,7 +193,7 @@ def test__handle_high_volume_reposter_check_over_threshold_remove(self, mock_ban mock_remove_post.assert_called_once_with('Removed', submission, mod_note=ANY) mock_response_handler.send_mod_mail.assert_not_called() - @patch.object(MonitoredSubService, '_remove_post') + @patch.object(MonitoredSubService, '_remove_submission') @patch.object(MonitoredSubService, '_ban_user') def test__handle_high_volume_reposter_check_over_threshold_remove_and_ban(self, mock_ban_user, mock_remove_post): mock_uow = MagicMock( @@ -218,49 +220,55 @@ def test__handle_high_volume_reposter_check_over_threshold_remove_and_ban(self, mock_remove_post.assert_called_once_with('Removed', submission, mod_note=ANY) mock_response_handler.send_mod_mail.assert_not_called() - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_high_volume_reposter_check_over_threshold_send_mod_mail(self, mock_ban_user, mock_remove_post): - mock_uow = MagicMock( - stat_top_reposter=MagicMock(get_total_reposts_by_author_and_day_range=MagicMock(return_value=200)), - user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) - ) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - monitored_sub = MonitoredSub( - name='test_subreddit', - high_volume_reposter_ban_user=False, - high_volume_reposter_threshold=100, - high_volume_reposter_notify_mod_mail=True, - high_volume_reposter_remove_post=False - ) - post = Post(subreddit='test_subreddit', author='test_user') - sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) - mock_ban_user.assert_not_called() - mock_remove_post.assert_not_called() - mock_response_handler.send_mod_mail.assert_called_with( - 'test_subreddit', ANY, 'New Submission From High Volume Reposter', source='sub_monitor') - @patch.object(MonitoredSubService, '_remove_post') - @patch.object(MonitoredSubService, '_ban_user') - def test__handle_high_volume_reposter_check_over_threshold_ignore_whitelist(self, mock_ban_user, mock_remove_post): - user_whitelist = UserWhitelist(username='test_user', ignore_high_volume_repost_detection=True) - mock_uow = MagicMock( - stat_top_reposter=MagicMock(get_total_reposts_by_author_and_day_range=MagicMock(return_value=200)) - ) - mock_response_handler = Mock(send_mod_mail=Mock()) - sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, - config=MagicMock()) - monitored_sub = MonitoredSub( - name='test_subreddit', - high_volume_reposter_ban_user=False, - high_volume_reposter_threshold=100, - high_volume_reposter_notify_mod_mail=True, - high_volume_reposter_remove_post=False - ) - post = Post(subreddit='test_subreddit', author='test_user') - sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub, whitelisted_user=user_whitelist) - mock_ban_user.assert_not_called() - mock_remove_post.assert_not_called() - mock_response_handler.send_mod_mail.assert_not_called() \ No newline at end of file + + # @patch.object(MonitoredSubService, '_remove_submission') + # @patch.object(MonitoredSubService, '_ban_user') + # @patch.object(MonitoredSubService, '_send_mod_mail') + # def test__handle_high_volume_reposter_check_over_threshold_send_mod_mail(self, mock_send_mod_mail, mock_ban_user, mock_remove_post): + # + # mock_uow = MagicMock( + # stat_top_reposter=MagicMock(get_total_reposts_by_author_and_day_range=MagicMock(return_value=200)), + # user_whitelist=MagicMock(get_by_username_and_subreddit=MagicMock(return_value=None)) + # ) + # + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=MagicMock()) + # monitored_sub = MonitoredSub( + # name='test_subreddit', + # high_volume_reposter_ban_user=False, + # high_volume_reposter_threshold=100, + # high_volume_reposter_notify_mod_mail=True, + # high_volume_reposter_remove_post=False + # ) + # post = Post(subreddit='test_subreddit', author='test_user') + # sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub) + # mock_ban_user.assert_not_called() + # mock_remove_post.assert_not_called() + # mock_send_mod_mail.assert_called_with( + # 'test_subreddit', ANY, 'New Submission From High Volume Reposter', source='sub_monitor') + # + # @patch('redditrepostsleuth.core.util.repost_filters.config') + # @patch.object(MonitoredSubService, '_remove_submission') + # @patch.object(MonitoredSubService, '_ban_user') + # def test__handle_high_volume_reposter_check_over_threshold_ignore_whitelist(self, mock_ban_user, mock_remove_post, mock_config): + # config = Mock(util_api='http://example.com') + # mock_config.return_value = config + # user_whitelist = UserWhitelist(username='test_user', ignore_high_volume_repost_detection=True) + # mock_uow = MagicMock( + # stat_top_reposter=MagicMock(get_total_reposts_by_author_and_day_range=MagicMock(return_value=200)) + # ) + # mock_response_handler = Mock(send_mod_mail=Mock()) + # sub_monitor = MonitoredSubService(MagicMock(), MagicMock(), MagicMock(), MagicMock(), mock_response_handler, + # config=MagicMock()) + # monitored_sub = MonitoredSub( + # name='test_subreddit', + # high_volume_reposter_ban_user=False, + # high_volume_reposter_threshold=100, + # high_volume_reposter_notify_mod_mail=True, + # high_volume_reposter_remove_post=False + # ) + # post = Post(subreddit='test_subreddit', author='test_user') + # sub_monitor.handle_high_volume_reposter_check(post, mock_uow, monitored_sub, whitelisted_user=user_whitelist) + # mock_ban_user.assert_not_called() + # mock_remove_post.assert_not_called() + # mock_response_handler.send_mod_mail.assert_not_called() \ No newline at end of file