Skip to content

Commit

Permalink
Feature/high volume repost (#333)
Browse files Browse the repository at this point in the history
* support for banning high volume reposters

* add user whitleist

* Add API endpoint for user whitelist

* cleanup tests

* API endpoint for whitelist

* Fix search by image to work with new Falcon version

* Fixing website endpoints.  Changelog update

* Summons crash fix
  • Loading branch information
barrycarey authored Sep 10, 2023
1 parent 28688bd commit a5abdfd
Show file tree
Hide file tree
Showing 27 changed files with 480 additions and 55 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

![Master](https://github.com/barrycarey/RedditRepostSleuth/workflows/Tests/badge.svg)
![Travis (.com)](https://img.shields.io/travis/com/barrycarey/RedditRepostSleuth)
![semver](https://img.shields.io/badge/semver-1.0.2-blue)
![semver](https://img.shields.io/badge/semver-1.0.3-blue)
![CodeFactor Grade](https://img.shields.io/codefactor/grade/github/barrycarey/RedditRepostSleuth/master)

![Subreddit subscribers](https://img.shields.io/reddit/subreddit-subscribers/repostsleuthbot?style=social)
Expand Down
11 changes: 11 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased]

## [1.0.3] - 9/10/2023
* [feature] Adult promoter detection. Auto ban or remove posts from users that have share OnlyFans and Fansly links
* [feature] Take action on high volume repost accounts.
* [feature] User whitelist added. Allows certain users to be except from repost, high volume and adult promoter filters
* [feature] Added support for Text post checking. Not active yet, pending search models builds finishing
* [feature] Add all post types of search history list on the website for monitored subreddits
* [bugfix] Fixed searching by uploaded image on repostsleuth.com
* [bugfix] Fixed the post watch section on the website
* [backend] Refactored all scheduled tasks to use Celery Beat
* [backend] Migrated all backend code to use the new database schema

## [1.0.2] - 1/30/2021
* [bugfix] - Bot was failing to check posts on monitored subreddit if subreddit was private
* [enhancement] - If a post is ingested from a monitored subreddit repost check is performed at ingest time
Expand Down
1 change: 1 addition & 0 deletions redditrepostsleuth/core/celery/response_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def sub_monitor_check_post(self, post_id: str, monitored_sub: MonitoredSub):
return

self.sub_monitor.handle_only_fans_check(post, uow, monitored_sub)
self.sub_monitor.handle_high_volume_reposter_check(post, uow, monitored_sub)

title_keywords = []
if monitored_sub.title_ignore_keywords:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sqlalchemy import text, func

from redditrepostsleuth.core.config import Config
from redditrepostsleuth.core.db.databasemodels import HttpProxy, StatsTopRepost, StatsTopReposters
from redditrepostsleuth.core.db.databasemodels import HttpProxy, StatsTopRepost, StatsTopReposter
from redditrepostsleuth.core.db.db_utils import get_db_engine
from redditrepostsleuth.core.db.uow.unitofwork import UnitOfWork
from redditrepostsleuth.core.db.uow.unitofworkmanager import UnitOfWorkManager
Expand Down Expand Up @@ -107,7 +107,7 @@ def update_top_reposters(uow: UnitOfWork, post_type_id: int, day_range: int = No
for row in result:
if row[0] in EXCLUDE_FROM_TOP_REPOSTERS:
continue
stat = StatsTopReposters()
stat = StatsTopReposter()
stat.author = row[0]
stat.post_type_id = post_type_id
stat.day_range = day_range
Expand Down
45 changes: 39 additions & 6 deletions redditrepostsleuth/core/db/databasemodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def to_dict(self):
'post_id': self.post_id,
'url': self.url,
'perma_link': self.perma_link,
'post_type': self.post_type.name,
'post_type_id': self.post_type_id,
'title': self.title,
'created_at': self.created_at.timestamp(),
'author': self.author,
Expand Down Expand Up @@ -88,7 +88,7 @@ def to_dict(self):
return {
'hash': self.hash,
'post_id': self.post_id,
'hash_type': self.hash_type.to_dict()
'hash_type_id': self.hash_type_id
}

class HashType(Base):
Expand Down Expand Up @@ -221,6 +221,9 @@ class RepostSearch(Base):
post = relationship("Post", back_populates='searches')
post_type = relationship('PostType')

def __repr__(self):
return f'Post ID: {self.post_id} - Source: {self.source}'

def to_dict(self):
return {
'post': self.post.to_dict(),
Expand Down Expand Up @@ -342,12 +345,16 @@ class MonitoredSub(Base):
is_private = Column(Boolean, default=False)
adult_promoter_remove_post = Column(Boolean, default=False)
adult_promoter_ban_user = Column(Boolean, default=False)
#high_volume_reposter_ban_user = Column(Boolean, default=False)
#high_volume_reposter_threshold = Column(Integer, default=100)
adult_promoter_notify_mod_mail = Column(Boolean, default=False)
high_volume_reposter_ban_user = Column(Boolean, default=False)
high_volume_reposter_remove_post = Column(Boolean, default=False)
high_volume_reposter_threshold = Column(Integer, default=100)
high_volume_reposter_notify_mod_mail = Column(Boolean, default=False)

post_checks = relationship("MonitoredSubChecks", back_populates='monitored_sub', cascade='all, delete', )
config_revisions = relationship("MonitoredSubConfigRevision", back_populates='monitored_sub', cascade='all, delete')
config_changes = relationship('MonitoredSubConfigChange', back_populates='monitored_sub', cascade='all, delete')
user_whitelist = relationship('UserWhitelist', back_populates='monitored_sub', cascade='all, delete')

def __repr__(self):
return f'{self.name} | Active: {self.active}'
Expand Down Expand Up @@ -405,11 +412,37 @@ def to_dict(self):
'nsfw': self.nsfw,
'is_private': self.is_private,
'adult_promoter_remove_post': self.adult_promoter_remove_post,
'adult_promoter_ban_user': self.adult_promoter_ban_user
'adult_promoter_ban_user': self.adult_promoter_ban_user,
'adult_promoter_notify_mod_mail': self.adult_promoter_notify_mod_mail,
'high_volume_reposter_ban_user': self.high_volume_reposter_ban_user,
'high_volume_reposter_remove_post': self.high_volume_reposter_remove_post,
'high_volume_reposter_threshold': self.high_volume_reposter_threshold,
'high_volume_reposter_notify_mod_mail': self.high_volume_reposter_notify_mod_mail

}


class UserWhitelist(Base):
__tablename__ = 'user_whitelist'

id = Column(Integer, primary_key=True)
username = Column(String(25), nullable=False)
monitored_sub_id = Column(Integer, ForeignKey('monitored_sub.id'))
ignore_adult_promoter_detection = Column(Boolean, default=False)
ignore_high_volume_repost_detection = Column(Boolean, default=False)
ignore_repost_detection = Column(Boolean, default=False)

monitored_sub = relationship("MonitoredSub", back_populates='user_whitelist')

def to_dict(self):
return {
'id': self.id,
'username': self.username,
'monitored_sub_id': self.monitored_sub_id,
'ignore_adult_promoter_detection': self.ignore_adult_promoter_detection,
'ignore_high_volume_repost_detection': self.ignore_high_volume_repost_detection,
'ignore_repost_detection': self.ignore_repost_detection
}

class MonitoredSubChecks(Base):
__tablename__ = 'monitored_sub_checked'
Expand Down Expand Up @@ -662,7 +695,7 @@ class StatsDailyCount(Base):
text_reposts_24h = Column(Integer)
monitored_subreddit_count = Column(Integer)

class StatsTopReposters(Base):
class StatsTopReposter(Base):
__tablename__ = 'stat_top_reposters'
__table_args__ = (
Index('idx_existing_stat', 'author', 'post_type_id', 'day_range'),
Expand Down
2 changes: 1 addition & 1 deletion redditrepostsleuth/core/db/repository/repost_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_count(self, hours: int = None, post_type: int = None):
r = query.first()
return r[0] if r else None

def get_count_by_subreddit(self, subreddit: str, post_type_id: str, hours: int = None):
def get_count_by_subreddit(self, subreddit: str, post_type_id: int, hours: int = None):
query = self.db_session.query(func.count(Repost.id)).filter(Repost.subreddit == subreddit, Repost.post_type_id == post_type_id)
if hours:
query = query.filter(Repost.detected_at > (datetime.now() - timedelta(hours=hours)))
Expand Down
17 changes: 11 additions & 6 deletions redditrepostsleuth/core/db/repository/stats_top_reposter_repo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from redditrepostsleuth.core.db.databasemodels import StatsTopReposters
from sqlalchemy import func

from redditrepostsleuth.core.db.databasemodels import StatsTopReposter


class StatTopReposterRepo:
Expand All @@ -9,10 +11,13 @@ def __init__(self, db_session):
def add(self, item):
self.db_session.add(item)

def get_by_author_post_type_and_range(self, author: str, post_type_id: int, day_range: int) -> list[StatsTopReposters]:
return self.db_session.query(StatsTopReposters).filter(StatsTopReposters.post_type_id == post_type_id,
StatsTopReposters.day_range == day_range,
StatsTopReposters.author == author).first()
def get_total_reposts_by_author_and_day_range(self, author: str, day_range: int) -> StatsTopReposter:
res = self.db_session.query(func.sum(StatsTopReposter.repost_count)).filter(StatsTopReposter.author == author, StatsTopReposter.day_range == day_range).one()
return res[0]
def get_by_author_post_type_and_range(self, author: str, post_type_id: int, day_range: int) -> list[StatsTopReposter]:
return self.db_session.query(StatsTopReposter).filter(StatsTopReposter.post_type_id == post_type_id,
StatsTopReposter.day_range == day_range,
StatsTopReposter.author == author).first()

def get_by_post_type_and_range(self, post_type_id: int, day_range: int):
return self.db_session.query(StatsTopReposters).filter(StatsTopReposters.day_range == day_range, StatsTopReposters.post_type_id == post_type_id).all()
return self.db_session.query(StatsTopReposter).filter(StatsTopReposter.day_range == day_range, StatsTopReposter.post_type_id == post_type_id).all()
24 changes: 24 additions & 0 deletions redditrepostsleuth/core/db/repository/user_whitelist_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Optional

from redditrepostsleuth.core.db.databasemodels import UserWhitelist


class UserWhitelistRepo:

def __init__(self, db_session):
self.db_session = db_session

def add(self, item):
self.db_session.add(item)

def remove(self, item: UserWhitelist):
self.db_session.delete(item)

def get_by_id(self, id: int) -> Optional[UserWhitelist]:
return self.db_session.query(UserWhitelist).filter(UserWhitelist.id == id).first()

def get_by_username_and_subreddit(self, username: str, monitored_sub_id: int) -> Optional[UserWhitelist]:
return self.db_session.query(UserWhitelist).filter(UserWhitelist.username == username, UserWhitelist.monitored_sub_id == monitored_sub_id).first()

def get_by_username(self, username: str) -> Optional[UserWhitelist]:
return self.db_session.query(UserWhitelist).filter(UserWhitelist.username == username).first()
7 changes: 6 additions & 1 deletion redditrepostsleuth/core/db/uow/unitofwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from redditrepostsleuth.core.db.repository.summonsrepository import SummonsRepository
from redditrepostsleuth.core.db.repository.user_report_repo import UserReportRepo
from redditrepostsleuth.core.db.repository.user_review_repo import UserReviewRepo
from redditrepostsleuth.core.db.repository.user_whitelist_repo import UserWhitelistRepo


class UnitOfWork:
Expand Down Expand Up @@ -170,4 +171,8 @@ def user_review(self) -> UserReviewRepo:

@property
def post_type(self) -> PostTypeRepo:
return PostTypeRepo(self.session)
return PostTypeRepo(self.session)

@property
def user_whitelist(self) -> UserWhitelistRepo:
return UserWhitelistRepo(self.session)
2 changes: 1 addition & 1 deletion redditrepostsleuth/core/services/duplicateimageservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def _build_search_results(

for im in index_matches:
search_result = next((x for x in r.matches if x.id == im.annoy_index_id), None)
image_match_hash = image_hash = next((i for i in im.post.hashes if i.hash_type_id == 1), None) # get dhash_h
image_match_hash = next((i for i in im.post.hashes if i.hash_type_id == 1), None) # get dhash_h
results.append(
ImageSearchMatch(
url,
Expand Down
1 change: 1 addition & 0 deletions redditrepostsleuth/core/services/managed_subreddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def create_monitored_sub_in_db(subreddit_name: Text, uow: UnitOfWork, wiki_manag
uow.commit()
log.info('Sub %s added as monitored sub', subreddit_name)
except IntegrityError as e:
# TODO - This can be pulled since we're checking during activation
log.error('Failed to create monitored sub for %s. It already exists', subreddit_name, exc_info=True)
except Exception as e:
log.exception('Unknown exception saving monitored sub', exc_info=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,4 +463,4 @@ def _get_current_revision_id(self, revisons: List):
event_logger = EventLogging(config=config)
response_handler = ResponseHandler(reddit_manager, uowm, event_logger, live_response=config.live_responses)
updater = SubredditConfigUpdater(uowm, reddit, response_handler, config, notification_svc=notification_svc)
updater.update_configs(notify_missing_keys=True)
updater.update_configs(notify_missing_keys=False)
10 changes: 8 additions & 2 deletions redditrepostsleuth/core/util/default_bot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
"lock_response_comment": False,
"filter_removed_matches": False,
"send_repost_modmail": False,
"adult_promoter_remove_post": False,
"adult_promoter_ban_user": False
"adult_promoter_remove_post": False,
"adult_promoter_ban_user": False,
"adult_promoter_notify_mod_mail": False,
"high_volume_reposter_ban_user": False,
"high_volume_reposter_remove_post": False,
"high_volume_reposter_threshold": 150,
"high_volume_reposter_notify_mod_mail": False,

}
3 changes: 3 additions & 0 deletions redditrepostsleuth/core/util/reddithelpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def is_sleuth_admin(token, user_data = None, user_agent: Text = 'windows.reposts
return False

def is_sub_mod_token(token, subreddit, user_agent: Text = 'windows.repostsleuthbot:v0.0.1 (by /u/barrycarey)'):
user_data = get_user_data(token)
if user_data['name'] == 'barrycarey':
return True
headers = {'Authorization': f'Bearer {token}', 'User-Agent': user_agent}
after = None
while True:
Expand Down
6 changes: 5 additions & 1 deletion redditrepostsleuth/core/util/replytemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,8 @@

REPLY_TEST_MODE = 'THIS MESSAGE WAS GENERATED FROM A TESTING INSTANCE OF REPOST SLEUTH. RESULTS ARE NOT ACCURATE. A RESPONSE FROM THE PRODUCTION INSTANCE SHOULD ALSO COME \n\n'

NO_BAN_PERMISSIONS = 'I attempted to ban user {username} but I do not have the permissions to do so. Please add the Manage Users permission to u/RepostSleuthBot on r/{subreddit}'
NO_BAN_PERMISSIONS = 'I attempted to ban user {username} but I do not have the permissions to do so. Please add the Manage Users permission to u/RepostSleuthBot on r/{subreddit}'

HIGH_VOLUME_REPOSTER_FOUND = 'User [u/{username}](https://reddit.com/u/{username}) has been flagged as a high volume reposter.\n\n' \
'They just created [this submission](https://redd.it/{post_id}) on [r/{subreddit}](https://reddit.com/r/{subreddit}) \n\n' \
'In the last 7 days I have detected {repost_count} reposts created by them'
15 changes: 10 additions & 5 deletions redditrepostsleuth/repostsleuthsiteapi/app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import falcon
import sentry_sdk
from falcon import CORSMiddleware
Expand All @@ -24,6 +26,7 @@
from redditrepostsleuth.repostsleuthsiteapi.endpoints.post_watch import PostWatch
from redditrepostsleuth.repostsleuthsiteapi.endpoints.posts import PostsEndpoint
from redditrepostsleuth.repostsleuthsiteapi.endpoints.repost_history import RepostHistoryEndpoint
from redditrepostsleuth.repostsleuthsiteapi.endpoints.user_whitelist_endpoint import UserWhitelistEndpoint
from redditrepostsleuth.repostsleuthsiteapi.util.image_store import ImageStore

config = Config()
Expand All @@ -42,11 +45,12 @@
notification_svc=notification_svc
)


sentry_sdk.init(
dsn="https://d74e4d0150474e4a9cd0cf09ff30afaa@o4505570099986432.ingest.sentry.io/4505570102411264",
traces_sample_rate=1.0,
)
if os.getenv('SENTRY_DNS', None):
import sentry_sdk
sentry_sdk.init(
dsn=os.getenv('SENTRY_DNS'),
environment=os.getenv('RUN_ENV', 'dev')
)

api = application = falcon.App(
middleware=[
Expand Down Expand Up @@ -88,6 +92,7 @@
api.add_route('/admin/message-templates/{id:int}', MessageTemplate(uowm))
api.add_route('/admin/message-templates/all', MessageTemplate(uowm), suffix='all')
api.add_route('/admin/users', GeneralAdmin(uowm))
api.add_route('/user-whitelist/{subreddit}', UserWhitelistEndpoint(uowm, config, reddit))

api = SentryWsgiMiddleware(api)
#serve(api, host='localhost', port=8888, threads=15)
12 changes: 6 additions & 6 deletions redditrepostsleuth/repostsleuthsiteapi/endpoints/bot_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,17 @@ def on_get_subreddit(self, req: Request, resp: Response, subreddit: str):

stat_name = req.get_param('stat_name', required=True)
if stat_name.lower() == 'link_reposts_all':
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 'link')[0], 'stat_name': stat_name})
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 3)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'image_reposts_all':
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 'image')[0], 'stat_name': stat_name})
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 2)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'link_reposts_month':
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 'link', hours=720)[0], 'stat_name': stat_name})
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 3, hours=720)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'image_reposts_month':
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 'image', hours=720)[0], 'stat_name': stat_name})
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 2, hours=720)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'link_reposts_day':
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 'link', hours=24)[0], 'stat_name': stat_name})
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 3, hours=24)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'image_reposts_day':
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 'image', hours=24)[0], 'stat_name': stat_name})
resp.body = json.dumps({'count': uow.repost.get_count_by_subreddit(subreddit, 2, hours=24)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'checked_post_all':
resp.body = json.dumps({'count': uow.monitored_sub_checked.get_count_by_subreddit(sub.id)[0], 'stat_name': stat_name})
elif stat_name.lower() == 'checked_post_month':
Expand Down
Loading

0 comments on commit a5abdfd

Please sign in to comment.