From 20e03dd999f20630eff65db9b172ceb1f4bbc8a8 Mon Sep 17 00:00:00 2001 From: Sudipto Chandra Date: Sat, 16 Oct 2021 06:57:10 +0600 Subject: [PATCH 1/2] Update discord bot - rate limiting - vip users - reduce handler cache timeout --- .env.example | 12 +---- app.json | 6 +-- lncrawl/bots/console/resume_download.py | 2 + lncrawl/bots/discord/config.py | 37 +++++++++++--- lncrawl/bots/discord/discord_bot.py | 59 +++++++++++----------- lncrawl/bots/discord/message_handler.py | 65 ++++++++----------------- 6 files changed, 87 insertions(+), 94 deletions(-) diff --git a/.env.example b/.env.example index bb518e805..96c33694d 100644 --- a/.env.example +++ b/.env.example @@ -12,16 +12,8 @@ DISCORD_TOKEN= DISCORD_DISABLE_SEARCH=false DISCORD_SIGNAL_CHAR=! -# Publicly available data folder -PUBLIC_DATA_PATH= -PUBLIC_ADDRESS=http://18.218.187.242/ - -# Cloud drives for upload | Options: [GOFILE, GOOGLE_DRIVE] | Default: GOFILE -CLOUD_DRIVE=GOFILE - -# Gofile config. Token is optional. -GOFILE_TOKEN=kRgxIJe0D724Sdq0U12Hy0KwGhY10b1z -GOFILE_FOLDER_ID=1247451e-2730-46b3-8dd8-8cff8cb18a5c +# Cloud drives for upload | Options: [ANONFILES, GOFILE, GOOGLE_DRIVE] | Default: ANONFILES +CLOUD_DRIVE=ANONFILES # Google Drive Config GOOGLE_DRIVE_CREDENTIAL_FILE=mycreds.txt diff --git a/app.json b/app.json index 7e2d5d6f2..2574ea729 100644 --- a/app.json +++ b/app.json @@ -40,9 +40,9 @@ "value": "false" }, "CLOUD_DRIVE": { - "description": "Available: GOFILE, GOOGLE_DRIVE", + "description": "Available: GOFILE, GOOGLE_DRIVE, ANONFILES", "required": false, - "value": "GOFILE" + "value": "ANONFILES" }, "GOOGLE_DRIVE_CREDENTIAL_FILE": { "description": "Google Drive service credentials to use", @@ -65,4 +65,4 @@ "url": "https://github.com/NNTin/heroku-buildpack-calibre" } ] -} +} \ No newline at end of file diff --git a/lncrawl/bots/console/resume_download.py b/lncrawl/bots/console/resume_download.py index 2f6392306..0c8b49a82 100644 --- a/lncrawl/bots/console/resume_download.py +++ b/lncrawl/bots/console/resume_download.py @@ -52,6 +52,8 @@ def resume_session(): # end if app = load_session_from_metadata(metadata) + assert isinstance(app.crawler, Crawler) + print('Resuming', app.crawler.novel_title) print('Output path:', app.output_path) diff --git a/lncrawl/bots/discord/config.py b/lncrawl/bots/discord/config.py index e32b0a36e..e4ad980cf 100644 --- a/lncrawl/bots/discord/config.py +++ b/lncrawl/bots/discord/config.py @@ -1,17 +1,32 @@ # -*- coding: utf-8 -*- -import os import logging import logging.config +import os + from colorama import Fore -from ...core.arguments import get_args -# The special signal character for crawler commands +from lncrawl.core.arguments import get_args + +shard_id = get_args().shard_id +shard_count = get_args().shard_count signal = os.getenv('DISCORD_SIGNAL_CHAR') or '!' -max_workers = int(os.getenv('DISCORD_MAX_WORKERS', 10)) +discord_token = os.getenv('DISCORD_TOKEN') +disable_search = os.getenv('DISCORD_DISABLE_SEARCH') == 'true' +session_retain_time_in_seconds = 4 * 3600 +max_active_handles = 5 -# The public ip and path of the server to put files in -public_ip = os.getenv('PUBLIC_ADDRESS', None) -public_path = os.getenv('PUBLIC_DATA_PATH', None) +vip_users_ids = set([ + '1822', +]) + +available_formats = [ + 'epub', + 'text', + 'web', + 'mobi', + #'pdf', + #'fb2', +] os.makedirs('logs', exist_ok=True) logging.config.dictConfig({ @@ -41,7 +56,7 @@ 'file': { 'formatter': 'file', 'class': 'logging.handlers.RotatingFileHandler', - 'filename': 'logs/discord-bot_%s.log' % (get_args().shard_id), + 'filename': f'logs/discord-bot_{shard_id}.log', 'maxBytes': 10 * 1024 * 1024, # 10 MB 'backupCount': 5, 'encoding': 'utf8', @@ -54,3 +69,9 @@ }, }, }) + +logger = logging.getLogger(f'discord-{shard_id}') + +if not discord_token: + raise Exception('Discord token is not found') + diff --git a/lncrawl/bots/discord/discord_bot.py b/lncrawl/bots/discord/discord_bot.py index 3cc54b34a..38a47ac0e 100644 --- a/lncrawl/bots/discord/discord_bot.py +++ b/lncrawl/bots/discord/discord_bot.py @@ -1,19 +1,15 @@ # -*- coding: utf-8 -*- -import logging -import logging.config import os -import random import subprocess from datetime import datetime +from typing import Dict import discord -from ...core.arguments import get_args -from .config import signal +from . import config as C +from .config import logger from .message_handler import MessageHandler -logger = logging.getLogger(__name__) - def get_bot_version(): try: @@ -28,18 +24,19 @@ class DiscordBot(discord.Client): bot_version = get_bot_version() def __init__(self, *args, loop=None, **options): - options['shard_id'] = get_args().shard_id - options['shard_count'] = get_args().shard_count + options['shard_id'] = C.shard_id + options['shard_count'] = C.shard_count options['heartbeat_timeout'] = 300 options['guild_subscriptions'] = False options['fetch_offline_members'] = False + self.handlers: Dict[str, MessageHandler] = {} super().__init__(*args, loop=loop, **options) # end def def start_bot(self): self.bot_is_ready = False os.environ['debug_mode'] = 'yes' - self.run(os.getenv('DISCORD_TOKEN')) + self.run(C.discord_token) # end def async def on_ready(self): @@ -47,7 +44,7 @@ async def on_ready(self): self.handlers = {} print('Discord bot in online!') - activity = discord.Activity(name='for 🔥%s🔥 (%s)' % (signal, self.bot_version), + activity = discord.Activity(name='for 🔥%s🔥 (%s)' % (C.signal, self.bot_version), type=discord.ActivityType.watching) await self.change_presence(activity=activity, status=discord.Status.online) @@ -69,14 +66,13 @@ async def on_message(self, message): text = message.content if isinstance(message.channel, discord.abc.PrivateChannel): await self.handle_message(message) - elif text.startswith(signal) and len(text.split(signal)) == 2: - uid = message.author.id + elif text.startswith(C.signal) and len(text.split(C.signal)) == 2: + uid = str(message.author.id) if uid in self.handlers: self.handlers[uid].destroy() # end if - await self.send_public_text(message, random.choice([ - "Sending you a private message", - ])) + with message.channel.typing(): + await message.channel.send(f"Sending you a private message <@{uid}>") await self.handle_message(message) # end if except IndexError as ex: @@ -86,29 +82,34 @@ async def on_message(self, message): # end try # end def - async def send_public_text(self, message, text): - async with message.channel.typing(): - await message.channel.send(text + (" <@%s>" % str(message.author.id))) - # end def - async def handle_message(self, message): if self.is_closed(): return # end if try: uid = str(message.author.id) - logger.info("Processing message from %s", message.author.name) - if uid not in self.handlers: - self.handlers[uid] = MessageHandler(self) + discriminator = message.author.discriminator + logger.info("Processing message from %s#%s", message.author.name, discriminator) + if uid in self.handlers: + self.handlers[uid].process(message) + elif len(self.handlers) > C.max_active_handles or discriminator not in C.vip_users_ids: + await message.author.trigger_typing() + await message.author.send( + "Sorry! I am too busy processing requests of other users.\n" + "Please knock me here later!" + ) + else: + self.handlers[uid] = MessageHandler(uid, self) + logger.info("New handler for %s#%s [%s]", message.author.name, discriminator, uid) + await message.author.trigger_typing() await message.author.send( '-' * 25 + '\n' + - ('Hello %s\n' % message.author.name) + + f'Hello <@{uid}>\n' + '-' * 25 + '\n' ) - logger.info("New handler for %s", message.author.name) + self.handlers[uid].process(message) # end if - self.handlers[uid].process(message) - except Exception as err: + except Exception: logger.exception('While handling this message: %s', message) # end try # end def @@ -118,7 +119,7 @@ def cleanup_handlers(self): cur_time = datetime.now() for handler in self.handlers.values(): last_time = getattr(handler, 'last_activity', cur_time) - if (cur_time - last_time).days > 1: + if (cur_time - last_time).seconds > C.session_retain_time_in_seconds: handler.destroy() # end if # end for diff --git a/lncrawl/bots/discord/message_handler.py b/lncrawl/bots/discord/message_handler.py index 478cf9f01..e763e661e 100644 --- a/lncrawl/bots/discord/message_handler.py +++ b/lncrawl/bots/discord/message_handler.py @@ -1,43 +1,33 @@ # -*- coding: utf-8 -*- import asyncio -import logging import os import random import re import shutil from concurrent.futures import ThreadPoolExecutor from datetime import datetime -from urllib.parse import quote +from typing import Optional import discord -from ...core.app import App -from ...utils.uploader import upload -from .config import max_workers, public_ip, public_path +from lncrawl.core.app import App +from lncrawl.core.crawler import Crawler +from lncrawl.utils.uploader import upload -logger = logging.getLogger(__name__) - -available_formats = [ - 'epub', - 'text', - 'web', - 'mobi', - 'pdf', - 'fb2', -] - -disable_search = os.getenv('DISCORD_DISABLE_SEARCH') == 'true' +from .config import available_formats, disable_search, logger class MessageHandler: - def __init__(self, client): + def __init__(self, uid, client): self.app = App() + self.uid = uid self.client = client self.state = None - self.executor = ThreadPoolExecutor(max_workers) + self.executor = ThreadPoolExecutor(2) self.last_activity = datetime.now() self.closed = False self.get_current_status = None + self.selected_novel: Optional[dict] = None # end def def process(self, message): @@ -48,7 +38,7 @@ def process(self, message): def destroy(self): try: self.get_current_status = None - self.client.handlers.pop(str(self.user.id)) + self.client.handlers.pop(str(self.uid)) self.send_sync('Closing current session...') self.executor.shutdown(wait=False) self.app.destroy() @@ -61,7 +51,7 @@ def destroy(self): # end try # end def - def handle_message(self, message): + def handle_message(self, message: discord.Message): self.message = message self.user = message.author if not self.state: @@ -263,6 +253,7 @@ def handle_novel_selection(self): # end def def display_sources_selection(self): + assert isinstance(self.selected_novel, dict) novel_list = self.selected_novel['novels'] self.send_sync('**%s** is found in %d sources:\n' % (self.selected_novel['title'], len(novel_list))) @@ -288,6 +279,7 @@ def display_sources_selection(self): def handle_sources_to_search(self): self.state = self.busy_state + assert isinstance(self.selected_novel, dict) if len(self.selected_novel['novels']) == 1: novel = self.selected_novel['novels'][0] return self.handle_search_result(novel) @@ -350,9 +342,6 @@ def download_novel_info(self): # Setup output path root = os.path.abspath('.discord_bot_output') - if public_path and os.path.exists(public_path): - root = os.path.abspath(public_path) - # end if good_name = os.path.basename(self.app.output_path) output_path = os.path.join(root, str(self.user.id), good_name) shutil.rmtree(output_path, ignore_errors=True) @@ -373,6 +362,7 @@ def display_range_selection(self): '- Send `volume 2 5` to download download volume 2 and 5. Pass as many numbers you need.', '- Send `chapter 110 120` to download chapter 110 to 120. Only two numbers are accepted.', ])) + assert isinstance(self.app.crawler, Crawler) self.send_sync( '**It has `%d` volumes and `%d` chapters.**' % ( len(self.app.crawler.volumes), @@ -390,6 +380,7 @@ def handle_range_selection(self): return # end if + assert isinstance(self.app.crawler, Crawler) if text == 'all': self.app.chapters = self.app.crawler.chapters[:] elif re.match(r'^first(\s\d+)?$', text): @@ -421,7 +412,7 @@ def resolve_chapter(name): cid = 0 if name.isdigit(): cid = int(name) - else: + elif isinstance(self.app.crawler, Crawler): cid = self.app.crawler.get_chapter_index_of(name) # end if return cid - 1 @@ -517,6 +508,7 @@ def start_download(self): self.app.pack_by_volume = False try: + assert isinstance(self.app.crawler, Crawler) self.send_sync( '**%s**' % self.app.crawler.novel_title, 'Downloading %d chapters...' % len(self.app.chapters), @@ -539,14 +531,10 @@ def start_download(self): if self.closed: return - if public_ip and public_path and os.path.exists(public_path): - self.send_sync('Publishing files...') - self.publish_files() - else: - for archive in self.app.archived_outputs: - self.upload_file(archive) - # end for - # end if + assert isinstance(self.app.archived_outputs, list) + for archive in self.app.archived_outputs: + self.upload_file(archive) + # end for except Exception as ex: logger.exception('Failed to download') self.send_sync('Download failed!\n`%s`' % str(ex)) @@ -555,17 +543,6 @@ def start_download(self): # end try # end def - def publish_files(self): - try: - download_url = '%s/%s/%s' % (public_ip.strip('/'), - quote(str(self.user.id)), - quote(os.path.basename(self.app.output_path))) - self.send_sync('Download files from:\n' + download_url) - except Exception: - logger.exception('Fail to publish') - # end try - # end def - def upload_file(self, archive): # Check file size filename = os.path.basename(archive) From 7c6bf845d76e307aae55b6ce12c3440207262f21 Mon Sep 17 00:00:00 2001 From: Sudipto Chandra Date: Sat, 16 Oct 2021 07:04:23 +0600 Subject: [PATCH 2/2] Add anonfiles as alternative cloud storage --- lncrawl/bots/discord/message_handler.py | 2 +- lncrawl/utils/uploader/__init__.py | 8 +++++-- lncrawl/utils/uploader/anonfiles.py | 14 +++++++++++++ lncrawl/utils/uploader/gofile.py | 28 +++++++++---------------- 4 files changed, 31 insertions(+), 21 deletions(-) create mode 100644 lncrawl/utils/uploader/anonfiles.py diff --git a/lncrawl/bots/discord/message_handler.py b/lncrawl/bots/discord/message_handler.py index e763e661e..dc11719df 100644 --- a/lncrawl/bots/discord/message_handler.py +++ b/lncrawl/bots/discord/message_handler.py @@ -548,7 +548,7 @@ def upload_file(self, archive): filename = os.path.basename(archive) file_size = os.stat(archive).st_size if file_size > 7.99 * 1024 * 1024: - self.send_sync(f'File {filename} exceeds 8MB. Using alternative cloud storage.') + self.send_sync(f'File exceeds 8MB. Using alternative cloud storage.') try: description = 'Generated By : Lightnovel Crawler Discord Bot' direct_link = upload(archive, description) diff --git a/lncrawl/utils/uploader/__init__.py b/lncrawl/utils/uploader/__init__.py index 6598e8b76..ebcce027a 100644 --- a/lncrawl/utils/uploader/__init__.py +++ b/lncrawl/utils/uploader/__init__.py @@ -1,12 +1,16 @@ import os +cloud_drive = os.getenv('CLOUD_DRIVE', 'ANONFILES') def upload(file_path, description=None): - if os.getenv('CLOUD_DRIVE', 'GOFILE') == 'GOOGLE_DRIVE': + if cloud_drive == 'GOOGLE_DRIVE': from .google_drive import upload return upload(file_path, description) - else: + elif cloud_drive == 'GOFILE': from .gofile import upload return upload(file_path, description) + else: + from .anonfiles import upload + return upload(file_path, description) # end if # end def diff --git a/lncrawl/utils/uploader/anonfiles.py b/lncrawl/utils/uploader/anonfiles.py new file mode 100644 index 000000000..80bbc9ef7 --- /dev/null +++ b/lncrawl/utils/uploader/anonfiles.py @@ -0,0 +1,14 @@ +from requests import Session + + +# API Docs: https://anonfiles.com/docs/api +def upload(file_path, description): + with Session() as sess: + with open(file_path, "rb") as fp: + response = sess.post( + 'https://api.anonfiles.com/upload', + files={ 'file': fp }, + stream=True, + ) + response.raise_for_status() + return response.json()['data']['file']['url']['full'] diff --git a/lncrawl/utils/uploader/gofile.py b/lncrawl/utils/uploader/gofile.py index a4316cd54..ed8e756c3 100644 --- a/lncrawl/utils/uploader/gofile.py +++ b/lncrawl/utils/uploader/gofile.py @@ -1,5 +1,3 @@ -import os - from requests import Session @@ -10,19 +8,13 @@ def upload(file_path, description): response.raise_for_status() server_name = response.json()['data']['server'] - with open(file_path, "rb") as fp: - upload_url = f'https://{server_name}.gofile.io/uploadFile' - response = sess.post( - upload_url, - data={ - 'description': description, - #'token': os.getenv('GOFILE_TOKEN'), - #'folderId': os.getenv('GOFILE_FOLDER_ID'), - }, - files={ - 'upload_file': fp, - }, - stream=True, - ) - response.raise_for_status() - return response.json()['data']['directLink'] + with open(file_path, "rb") as fp: + upload_url = f'https://{server_name}.gofile.io/uploadFile' + response = sess.post( + upload_url, + data={'description': description}, + files={ 'upload_file': fp }, + stream=True, + ) + response.raise_for_status() + return response.json()['data']['directLink']