From 20e03dd999f20630eff65db9b172ceb1f4bbc8a8 Mon Sep 17 00:00:00 2001
From: Sudipto Chandra <dipu.sudipta@gmail.com>
Date: Sat, 16 Oct 2021 06:57:10 +0600
Subject: [PATCH 1/2] Update discord bot

- rate limiting
- vip users
- reduce handler cache timeout
---
 .env.example                            | 12 +----
 app.json                                |  6 +--
 lncrawl/bots/console/resume_download.py |  2 +
 lncrawl/bots/discord/config.py          | 37 +++++++++++---
 lncrawl/bots/discord/discord_bot.py     | 59 +++++++++++-----------
 lncrawl/bots/discord/message_handler.py | 65 ++++++++-----------------
 6 files changed, 87 insertions(+), 94 deletions(-)

diff --git a/.env.example b/.env.example
index bb518e805..96c33694d 100644
--- a/.env.example
+++ b/.env.example
@@ -12,16 +12,8 @@ DISCORD_TOKEN=
 DISCORD_DISABLE_SEARCH=false
 DISCORD_SIGNAL_CHAR=!
 
-# Publicly available data folder
-PUBLIC_DATA_PATH=
-PUBLIC_ADDRESS=http://18.218.187.242/
-
-# Cloud drives for upload | Options: [GOFILE, GOOGLE_DRIVE] | Default: GOFILE
-CLOUD_DRIVE=GOFILE
-
-# Gofile config. Token is optional.
-GOFILE_TOKEN=kRgxIJe0D724Sdq0U12Hy0KwGhY10b1z
-GOFILE_FOLDER_ID=1247451e-2730-46b3-8dd8-8cff8cb18a5c
+# Cloud drives for upload | Options: [ANONFILES, GOFILE, GOOGLE_DRIVE] | Default: ANONFILES
+CLOUD_DRIVE=ANONFILES
 
 # Google Drive Config
 GOOGLE_DRIVE_CREDENTIAL_FILE=mycreds.txt
diff --git a/app.json b/app.json
index 7e2d5d6f2..2574ea729 100644
--- a/app.json
+++ b/app.json
@@ -40,9 +40,9 @@
       "value": "false"
     },
     "CLOUD_DRIVE": {
-      "description": "Available: GOFILE, GOOGLE_DRIVE",
+      "description": "Available: GOFILE, GOOGLE_DRIVE, ANONFILES",
       "required": false,
-      "value": "GOFILE"
+      "value": "ANONFILES"
     },
     "GOOGLE_DRIVE_CREDENTIAL_FILE": {
       "description": "Google Drive service credentials to use",
@@ -65,4 +65,4 @@
       "url": "https://github.com/NNTin/heroku-buildpack-calibre"
     }
   ]
-}
+}
\ No newline at end of file
diff --git a/lncrawl/bots/console/resume_download.py b/lncrawl/bots/console/resume_download.py
index 2f6392306..0c8b49a82 100644
--- a/lncrawl/bots/console/resume_download.py
+++ b/lncrawl/bots/console/resume_download.py
@@ -52,6 +52,8 @@ def resume_session():
     # end if
 
     app = load_session_from_metadata(metadata)
+    assert isinstance(app.crawler, Crawler)
+
     print('Resuming', app.crawler.novel_title)
     print('Output path:', app.output_path)
 
diff --git a/lncrawl/bots/discord/config.py b/lncrawl/bots/discord/config.py
index e32b0a36e..e4ad980cf 100644
--- a/lncrawl/bots/discord/config.py
+++ b/lncrawl/bots/discord/config.py
@@ -1,17 +1,32 @@
 # -*- coding: utf-8 -*-
-import os
 import logging
 import logging.config
+import os
+
 from colorama import Fore
-from ...core.arguments import get_args
 
-# The special signal character for crawler commands
+from lncrawl.core.arguments import get_args
+
+shard_id = get_args().shard_id
+shard_count = get_args().shard_count
 signal = os.getenv('DISCORD_SIGNAL_CHAR') or '!'
-max_workers = int(os.getenv('DISCORD_MAX_WORKERS', 10))
+discord_token = os.getenv('DISCORD_TOKEN')
+disable_search = os.getenv('DISCORD_DISABLE_SEARCH') == 'true'
+session_retain_time_in_seconds = 4 * 3600
+max_active_handles = 5
 
-# The public ip and path of the server to put files in
-public_ip = os.getenv('PUBLIC_ADDRESS', None)
-public_path = os.getenv('PUBLIC_DATA_PATH', None)
+vip_users_ids = set([
+    '1822',
+])
+
+available_formats = [
+    'epub',
+    'text',
+    'web',
+    'mobi',
+    #'pdf',
+    #'fb2',
+]
 
 os.makedirs('logs', exist_ok=True)
 logging.config.dictConfig({
@@ -41,7 +56,7 @@
         'file': {
             'formatter': 'file',
             'class': 'logging.handlers.RotatingFileHandler',
-            'filename': 'logs/discord-bot_%s.log' % (get_args().shard_id),
+            'filename': f'logs/discord-bot_{shard_id}.log',
             'maxBytes': 10 * 1024 * 1024,  # 10 MB
             'backupCount': 5,
             'encoding': 'utf8',
@@ -54,3 +69,9 @@
         },
     },
 })
+
+logger = logging.getLogger(f'discord-{shard_id}')
+
+if not discord_token:
+    raise Exception('Discord token is not found')
+
diff --git a/lncrawl/bots/discord/discord_bot.py b/lncrawl/bots/discord/discord_bot.py
index 3cc54b34a..38a47ac0e 100644
--- a/lncrawl/bots/discord/discord_bot.py
+++ b/lncrawl/bots/discord/discord_bot.py
@@ -1,19 +1,15 @@
 # -*- coding: utf-8 -*-
-import logging
-import logging.config
 import os
-import random
 import subprocess
 from datetime import datetime
+from typing import Dict
 
 import discord
 
-from ...core.arguments import get_args
-from .config import signal
+from . import config as C
+from .config import logger
 from .message_handler import MessageHandler
 
-logger = logging.getLogger(__name__)
-
 
 def get_bot_version():
     try:
@@ -28,18 +24,19 @@ class DiscordBot(discord.Client):
     bot_version = get_bot_version()
 
     def __init__(self, *args, loop=None, **options):
-        options['shard_id'] = get_args().shard_id
-        options['shard_count'] = get_args().shard_count
+        options['shard_id'] = C.shard_id
+        options['shard_count'] = C.shard_count
         options['heartbeat_timeout'] = 300
         options['guild_subscriptions'] = False
         options['fetch_offline_members'] = False
+        self.handlers: Dict[str, MessageHandler] = {}
         super().__init__(*args, loop=loop, **options)
     # end def
 
     def start_bot(self):
         self.bot_is_ready = False
         os.environ['debug_mode'] = 'yes'
-        self.run(os.getenv('DISCORD_TOKEN'))
+        self.run(C.discord_token)
     # end def
 
     async def on_ready(self):
@@ -47,7 +44,7 @@ async def on_ready(self):
         self.handlers = {}
 
         print('Discord bot in online!')
-        activity = discord.Activity(name='for 🔥%s🔥 (%s)' % (signal, self.bot_version),
+        activity = discord.Activity(name='for 🔥%s🔥 (%s)' % (C.signal, self.bot_version),
                                     type=discord.ActivityType.watching)
         await self.change_presence(activity=activity,
                                    status=discord.Status.online)
@@ -69,14 +66,13 @@ async def on_message(self, message):
             text = message.content
             if isinstance(message.channel, discord.abc.PrivateChannel):
                 await self.handle_message(message)
-            elif text.startswith(signal) and len(text.split(signal)) == 2:
-                uid = message.author.id
+            elif text.startswith(C.signal) and len(text.split(C.signal)) == 2:
+                uid = str(message.author.id)
                 if uid in self.handlers:
                     self.handlers[uid].destroy()
                 # end if
-                await self.send_public_text(message, random.choice([
-                    "Sending you a private message",
-                ]))
+                with message.channel.typing():
+                    await message.channel.send(f"Sending you a private message <@{uid}>")
                 await self.handle_message(message)
             # end if
         except IndexError as ex:
@@ -86,29 +82,34 @@ async def on_message(self, message):
         # end try
     # end def
 
-    async def send_public_text(self, message, text):
-        async with message.channel.typing():
-            await message.channel.send(text + (" <@%s>" % str(message.author.id)))
-    # end def
-
     async def handle_message(self, message):
         if self.is_closed():
             return
         # end if
         try:
             uid = str(message.author.id)
-            logger.info("Processing message from %s", message.author.name)
-            if uid not in self.handlers:
-                self.handlers[uid] = MessageHandler(self)
+            discriminator = message.author.discriminator
+            logger.info("Processing message from %s#%s", message.author.name, discriminator)
+            if uid in self.handlers:
+                self.handlers[uid].process(message)
+            elif len(self.handlers) > C.max_active_handles or discriminator not in C.vip_users_ids:
+                await message.author.trigger_typing()
+                await message.author.send(
+                    "Sorry! I am too busy processing requests of other users.\n"
+                    "Please knock me here later!"
+                )
+            else:
+                self.handlers[uid] = MessageHandler(uid, self)
+                logger.info("New handler for %s#%s [%s]", message.author.name, discriminator, uid)
+                await message.author.trigger_typing()
                 await message.author.send(
                     '-' * 25 + '\n' +
-                    ('Hello %s\n' % message.author.name) +
+                    f'Hello <@{uid}>\n' +
                     '-' * 25 + '\n'
                 )
-                logger.info("New handler for %s", message.author.name)
+                self.handlers[uid].process(message)
             # end if
-            self.handlers[uid].process(message)
-        except Exception as err:
+        except Exception:
             logger.exception('While handling this message: %s', message)
         # end try
     # end def
@@ -118,7 +119,7 @@ def cleanup_handlers(self):
             cur_time = datetime.now()
             for handler in self.handlers.values():
                 last_time = getattr(handler, 'last_activity', cur_time)
-                if (cur_time - last_time).days > 1:
+                if (cur_time - last_time).seconds > C.session_retain_time_in_seconds:
                     handler.destroy()
                 # end if
             # end for
diff --git a/lncrawl/bots/discord/message_handler.py b/lncrawl/bots/discord/message_handler.py
index 478cf9f01..e763e661e 100644
--- a/lncrawl/bots/discord/message_handler.py
+++ b/lncrawl/bots/discord/message_handler.py
@@ -1,43 +1,33 @@
 # -*- coding: utf-8 -*-
 import asyncio
-import logging
 import os
 import random
 import re
 import shutil
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
-from urllib.parse import quote
+from typing import Optional
 
 import discord
 
-from ...core.app import App
-from ...utils.uploader import upload
-from .config import max_workers, public_ip, public_path
+from lncrawl.core.app import App
+from lncrawl.core.crawler import Crawler
+from lncrawl.utils.uploader import upload
 
-logger = logging.getLogger(__name__)
-
-available_formats = [
-    'epub',
-    'text',
-    'web',
-    'mobi',
-    'pdf',
-    'fb2',
-]
-
-disable_search = os.getenv('DISCORD_DISABLE_SEARCH') == 'true'
+from .config import available_formats, disable_search, logger
 
 
 class MessageHandler:
-    def __init__(self, client):
+    def __init__(self, uid, client):
         self.app = App()
+        self.uid = uid
         self.client = client
         self.state = None
-        self.executor = ThreadPoolExecutor(max_workers)
+        self.executor = ThreadPoolExecutor(2)
         self.last_activity = datetime.now()
         self.closed = False
         self.get_current_status = None
+        self.selected_novel: Optional[dict] = None
     # end def
 
     def process(self, message):
@@ -48,7 +38,7 @@ def process(self, message):
     def destroy(self):
         try:
             self.get_current_status = None
-            self.client.handlers.pop(str(self.user.id))
+            self.client.handlers.pop(str(self.uid))
             self.send_sync('Closing current session...')
             self.executor.shutdown(wait=False)
             self.app.destroy()
@@ -61,7 +51,7 @@ def destroy(self):
         # end try
     # end def
 
-    def handle_message(self, message):
+    def handle_message(self, message: discord.Message):
         self.message = message
         self.user = message.author
         if not self.state:
@@ -263,6 +253,7 @@ def handle_novel_selection(self):
     # end def
 
     def display_sources_selection(self):
+        assert isinstance(self.selected_novel, dict)
         novel_list = self.selected_novel['novels']
         self.send_sync('**%s** is found in %d sources:\n' %
                        (self.selected_novel['title'], len(novel_list)))
@@ -288,6 +279,7 @@ def display_sources_selection(self):
     def handle_sources_to_search(self):
         self.state = self.busy_state
 
+        assert isinstance(self.selected_novel, dict)
         if len(self.selected_novel['novels']) == 1:
             novel = self.selected_novel['novels'][0]
             return self.handle_search_result(novel)
@@ -350,9 +342,6 @@ def download_novel_info(self):
 
         # Setup output path
         root = os.path.abspath('.discord_bot_output')
-        if public_path and os.path.exists(public_path):
-            root = os.path.abspath(public_path)
-        # end if
         good_name = os.path.basename(self.app.output_path)
         output_path = os.path.join(root, str(self.user.id), good_name)
         shutil.rmtree(output_path, ignore_errors=True)
@@ -373,6 +362,7 @@ def display_range_selection(self):
             '- Send `volume 2 5` to download download volume 2 and 5. Pass as many numbers you need.',
             '- Send `chapter 110 120` to download chapter 110 to 120. Only two numbers are accepted.',
         ]))
+        assert isinstance(self.app.crawler, Crawler)
         self.send_sync(
             '**It has `%d` volumes and `%d` chapters.**' % (
                 len(self.app.crawler.volumes),
@@ -390,6 +380,7 @@ def handle_range_selection(self):
             return
         # end if
 
+        assert isinstance(self.app.crawler, Crawler)
         if text == 'all':
             self.app.chapters = self.app.crawler.chapters[:]
         elif re.match(r'^first(\s\d+)?$', text):
@@ -421,7 +412,7 @@ def resolve_chapter(name):
                     cid = 0
                     if name.isdigit():
                         cid = int(name)
-                    else:
+                    elif isinstance(self.app.crawler, Crawler):
                         cid = self.app.crawler.get_chapter_index_of(name)
                     # end if
                     return cid - 1
@@ -517,6 +508,7 @@ def start_download(self):
         self.app.pack_by_volume = False
 
         try:
+            assert isinstance(self.app.crawler, Crawler)
             self.send_sync(
                 '**%s**' % self.app.crawler.novel_title,
                 'Downloading %d chapters...' % len(self.app.chapters),
@@ -539,14 +531,10 @@ def start_download(self):
             if self.closed:
                 return
 
-            if public_ip and public_path and os.path.exists(public_path):
-                self.send_sync('Publishing files...')
-                self.publish_files()
-            else:
-                for archive in self.app.archived_outputs:
-                    self.upload_file(archive)
-                # end for
-            # end if
+            assert isinstance(self.app.archived_outputs, list)
+            for archive in self.app.archived_outputs:
+                self.upload_file(archive)
+            # end for
         except Exception as ex:
             logger.exception('Failed to download')
             self.send_sync('Download failed!\n`%s`' % str(ex))
@@ -555,17 +543,6 @@ def start_download(self):
         # end try
     # end def
 
-    def publish_files(self):
-        try:
-            download_url = '%s/%s/%s' % (public_ip.strip('/'),
-                                         quote(str(self.user.id)),
-                                         quote(os.path.basename(self.app.output_path)))
-            self.send_sync('Download files from:\n' + download_url)
-        except Exception:
-            logger.exception('Fail to publish')
-        # end try
-    # end def
-
     def upload_file(self, archive):
         # Check file size
         filename = os.path.basename(archive)

From 7c6bf845d76e307aae55b6ce12c3440207262f21 Mon Sep 17 00:00:00 2001
From: Sudipto Chandra <dipu.sudipta@gmail.com>
Date: Sat, 16 Oct 2021 07:04:23 +0600
Subject: [PATCH 2/2] Add anonfiles as alternative cloud storage

---
 lncrawl/bots/discord/message_handler.py |  2 +-
 lncrawl/utils/uploader/__init__.py      |  8 +++++--
 lncrawl/utils/uploader/anonfiles.py     | 14 +++++++++++++
 lncrawl/utils/uploader/gofile.py        | 28 +++++++++----------------
 4 files changed, 31 insertions(+), 21 deletions(-)
 create mode 100644 lncrawl/utils/uploader/anonfiles.py

diff --git a/lncrawl/bots/discord/message_handler.py b/lncrawl/bots/discord/message_handler.py
index e763e661e..dc11719df 100644
--- a/lncrawl/bots/discord/message_handler.py
+++ b/lncrawl/bots/discord/message_handler.py
@@ -548,7 +548,7 @@ def upload_file(self, archive):
         filename = os.path.basename(archive)
         file_size = os.stat(archive).st_size
         if file_size > 7.99 * 1024 * 1024:
-            self.send_sync(f'File {filename} exceeds 8MB. Using alternative cloud storage.')
+            self.send_sync(f'File exceeds 8MB. Using alternative cloud storage.')
             try:
                 description = 'Generated By : Lightnovel Crawler Discord Bot'
                 direct_link = upload(archive, description)
diff --git a/lncrawl/utils/uploader/__init__.py b/lncrawl/utils/uploader/__init__.py
index 6598e8b76..ebcce027a 100644
--- a/lncrawl/utils/uploader/__init__.py
+++ b/lncrawl/utils/uploader/__init__.py
@@ -1,12 +1,16 @@
 import os
 
+cloud_drive = os.getenv('CLOUD_DRIVE', 'ANONFILES')
 
 def upload(file_path, description=None):
-    if os.getenv('CLOUD_DRIVE', 'GOFILE') == 'GOOGLE_DRIVE':
+    if cloud_drive == 'GOOGLE_DRIVE':
         from .google_drive import upload
         return upload(file_path, description)
-    else:
+    elif cloud_drive == 'GOFILE':
         from .gofile import upload
         return upload(file_path, description)
+    else:
+        from .anonfiles import upload
+        return upload(file_path, description)
     # end if
 # end def
diff --git a/lncrawl/utils/uploader/anonfiles.py b/lncrawl/utils/uploader/anonfiles.py
new file mode 100644
index 000000000..80bbc9ef7
--- /dev/null
+++ b/lncrawl/utils/uploader/anonfiles.py
@@ -0,0 +1,14 @@
+from requests import Session
+
+
+# API Docs: https://anonfiles.com/docs/api
+def upload(file_path, description):
+    with Session() as sess:
+        with open(file_path, "rb") as fp:
+            response = sess.post(
+                'https://api.anonfiles.com/upload',
+                files={ 'file': fp },
+                stream=True,
+            )
+            response.raise_for_status()
+            return response.json()['data']['file']['url']['full']
diff --git a/lncrawl/utils/uploader/gofile.py b/lncrawl/utils/uploader/gofile.py
index a4316cd54..ed8e756c3 100644
--- a/lncrawl/utils/uploader/gofile.py
+++ b/lncrawl/utils/uploader/gofile.py
@@ -1,5 +1,3 @@
-import os
-
 from requests import Session
 
 
@@ -10,19 +8,13 @@ def upload(file_path, description):
         response.raise_for_status()
         server_name = response.json()['data']['server']
 
-    with open(file_path, "rb") as fp:
-        upload_url = f'https://{server_name}.gofile.io/uploadFile'
-        response = sess.post(
-            upload_url,
-            data={
-                'description': description,
-                #'token': os.getenv('GOFILE_TOKEN'),
-                #'folderId': os.getenv('GOFILE_FOLDER_ID'),
-            },
-            files={
-                'upload_file': fp,
-            },
-            stream=True,
-        )
-        response.raise_for_status()
-        return response.json()['data']['directLink']
+        with open(file_path, "rb") as fp:
+            upload_url = f'https://{server_name}.gofile.io/uploadFile'
+            response = sess.post(
+                upload_url,
+                data={'description': description},
+                files={ 'upload_file': fp },
+                stream=True,
+            )
+            response.raise_for_status()
+            return response.json()['data']['directLink']