From bfbf060caae169a950e5774c9e7a0fac4238128e Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 02:17:00 +0600 Subject: [PATCH 1/8] chore: wip --- compose.yml | 35 +++-- lncrawl/binders/__init__.py | 6 + lncrawl/binders/calibre.py | 33 ++++- lncrawl/bots/discord/cogs/__init__.py | 0 lncrawl/bots/discord/cogs/novels.py | 182 ++++++++++++++++++++++++++ lncrawl/bots/discord/config.py | 1 + lncrawl/bots/discord/discord_bot.py | 132 ++----------------- lncrawl/bots/discord/utils.py | 20 +++ lncrawl/core/app.py | 17 ++- lncrawl/core/crawler.py | 2 +- lncrawl/core/taskman.py | 4 + requirements.txt | 3 +- 12 files changed, 292 insertions(+), 143 deletions(-) create mode 100644 lncrawl/bots/discord/cogs/__init__.py create mode 100644 lncrawl/bots/discord/cogs/novels.py create mode 100644 lncrawl/bots/discord/utils.py diff --git a/compose.yml b/compose.yml index 570144207..21e707ace 100644 --- a/compose.yml +++ b/compose.yml @@ -1,23 +1,24 @@ -version: '3' +version: "3" services: chrome: image: selenium/standalone-chrome:latest + privileged: true shm_size: 6gb restart: unless-stopped ports: - "7900:7900" - "4444:4444" environment: - SE_VNC_VIEW_ONLY: '1' - SE_EVENT_BUS_PUBLISH_PORT: '4442' - SE_EVENT_BUS_SUBSCRIBE_PORT: '4443' - NODE_MAX_INSTANCE: '8' - NODE_MAX_SESSION: '8' - SE_NO_VNC_PORT: '7900' - SE_SCREEN_WIDTH: '1920' - SE_SCREEN_HEIGHT: '1080' - SE_NODE_GRID_URL: 'false' + SE_VNC_VIEW_ONLY: "1" + SE_EVENT_BUS_PUBLISH_PORT: "4442" + SE_EVENT_BUS_SUBSCRIBE_PORT: "4443" + NODE_MAX_INSTANCE: "8" + NODE_MAX_SESSION: "8" + SE_NO_VNC_PORT: "7900" + SE_SCREEN_WIDTH: "1920" + SE_SCREEN_HEIGHT: "1080" + SE_NODE_GRID_URL: "false" # telegram-bot: # image: lncrawl @@ -29,6 +30,14 @@ services: # environment: # CLOUD_DRIVE: "GOFILE" # TELEGRAM_TOKEN: "${TELEGRAM_TOKEN}" + redis: + image: redis:alpine + restart: always + ports: + - "6379:6379" + command: redis-server --save 20 1 --loglevel warning + volumes: + - redis_data:/data discord-bot: image: lncrawl @@ -36,7 +45,7 @@ services: context: . dockerfile: ./scripts/Dockerfile restart: unless-stopped - command: python -m lncrawl --suppress --bot discord --shard-id 0 --shard-count 1 --selenium-grid "http://chrome:4444" + command: python -m lncrawl --suppress --bot discord --selenium-grid "http://chrome:4444" depends_on: - chrome environment: @@ -44,3 +53,7 @@ services: DISCORD_TOKEN: "${DISCORD_TOKEN}" DISCORD_SIGNAL_CHAR: "${DISCORD_SIGNAL_CHAR}" DISCORD_DISABLE_SEARCH: "${DISCORD_DISABLE_SEARCH}" + +volumes: + redis_data: + driver: local diff --git a/lncrawl/binders/__init__.py b/lncrawl/binders/__init__.py index 89928f711..9a63ce752 100644 --- a/lncrawl/binders/__init__.py +++ b/lncrawl/binders/__init__.py @@ -57,15 +57,21 @@ def generate_books(app, data): for fmt in formats_to_generate: try: if fmt == "text": + print("text") + logger.info("creating text") outputs[fmt] = make_texts(app, data) elif fmt == "web": outputs[fmt] = make_webs(app, data) elif fmt == "epub": + logger.info("creating epub") outputs[fmt] = make_epubs(app, data) elif fmt in depends_on_epub: + logger.info("creating %s", fmt) + print(fmt) outputs[fmt] = make_calibres(app, outputs["epub"], fmt) except Exception as err: + print(err) logger.exception('Failed to generate "%s": %s' % (fmt, err)) finally: progress += 1 diff --git a/lncrawl/binders/calibre.py b/lncrawl/binders/calibre.py index 1cbfb5fce..8b22ffa4f 100644 --- a/lncrawl/binders/calibre.py +++ b/lncrawl/binders/calibre.py @@ -7,6 +7,23 @@ EBOOK_CONVERT = "ebook-convert" CALIBRE_LINK = "https://calibre-ebook.com/download" +# ebook-convert + [ +# '/home/mira/Projects/misc/lightnovel-crawler/.discord_bot_output/novelfull-com/Birth Of The Demonic Sword/epub/Birth Of The Demonic Sword c1-5.epub', +# '/home/mira/Projects/misc/lightnovel-crawler/.discord_bot_output/novelfull-com/Birth Of The Demonic Sword/mobi/Birth Of The Demonic Sword c1-5.mobi', +# '--unsmarten-punctuation', +# '--no-chapters-in-toc', +# '--title', 'Birth Of The Demonic Sword c1-5', +# '--authors', 'Eveofchaos', +# '--comments', '', +# '--language', 'en', +# '--tags', [], +# '--series', 'Birth Of The Demonic Sword', +# '--publisher', 'https://novelfull.com/', +# '--book-producer', 'Lightnovel Crawler', +# '--enable-heuristics', +# '--disable-renumber-headings', +# '--cover', '/home/mira/Projects/misc/lightnovel-crawler/.discord_bot_output/novelfull-com/Birth Of The Demonic Sword/cover.jpg'] + def run_ebook_convert(*args): """ @@ -14,10 +31,11 @@ def run_ebook_convert(*args): Visit https://manual.calibre-ebook.com/generated/en/ebook-convert.html for argument list. """ try: + # print(f"{EBOOK_CONVERT} {' '.join(list(args))}") isdebug = os.getenv("debug_mode") with open(os.devnull, "w", encoding="utf8") as dumper: subprocess.call( - [EBOOK_CONVERT] + list(args), + args=[EBOOK_CONVERT] + list(args), stdout=None if isdebug else dumper, stderr=None if isdebug else dumper, ) @@ -56,12 +74,12 @@ def epub_to_calibre(app, epub_file, out_fmt): file_name_without_ext, "--authors", app.crawler.novel_author, - '--comments', - app.crawler.synopsis, - '--language', - app.crawler.language, - '--tags', - app.crawler.novel_tags, + "--comments", + app.crawler.novel_synopsis, + "--language", + app.crawler.novel_language, + "--tags", + ",".join(app.crawler.novel_tags), "--series", app.crawler.novel_title, "--publisher", @@ -89,6 +107,7 @@ def epub_to_calibre(app, epub_file, out_fmt): print("Created: %s" % out_file_name) return out_file else: + print("conversion failed") logger.error("[%s] conversion failed: %s", out_fmt, epub_file_name) return None diff --git a/lncrawl/bots/discord/cogs/__init__.py b/lncrawl/bots/discord/cogs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lncrawl/bots/discord/cogs/novels.py b/lncrawl/bots/discord/cogs/novels.py new file mode 100644 index 000000000..58fba6d95 --- /dev/null +++ b/lncrawl/bots/discord/cogs/novels.py @@ -0,0 +1,182 @@ +import asyncio +import io +import math +import os +import shutil +import discord +import logging +import redis +from discord.ext import commands +from typing import Callable + +from lncrawl.core.app import App +from lncrawl.core.crawler import Crawler +from lncrawl.utils.uploader import upload +from ..utils import to_thread, validate_formats +from ..config import available_formats +from ....core.sources import prepare_crawler + +logger = logging.getLogger(__name__) + + +@to_thread +def download_novel(app: App) -> list: + try: + app.pack_by_volume = False + app.start_download() + app.bind_books() + app.compress_books() + assert isinstance(app.archived_outputs, list) + return app.archived_outputs + except Exception as ex: + logger.exception(ex) + + +@to_thread +def novel_by_url(url) -> discord.ApplicationContext: + app = App() + app.user_input = url + app.crawler = prepare_crawler(app.user_input) + app.get_novel_info() + assert isinstance(app.crawler, Crawler) + return app + + +@to_thread +def upload_file(archive) -> str | io.BufferedIOBase: + # Check file size + filename = os.path.basename(archive) + file_size = os.stat(archive).st_size + if file_size >= 8388608: + try: + description = "Generated by: lncrawl Discord bot" + return filename, upload(archive, description) + except Exception as e: + logger.error("Failed to upload file: %s", archive, e) + return filename, None + + return filename, open(archive, "rb") + + +def get_filename(archive) -> str: + return os.path.basename(archive) + + +async def update_progress(app: App, editFollowup: Callable[[str], None]): + chapterCount = len(app.chapters) + lastProgress = 0 + while app.crawler.future_progress < chapterCount: + # this is shit, but it ensures we won't be stuck if we miss the done window + if app.crawler.future_progress < lastProgress: + break + lastProgress = app.crawler.future_progress + await editFollowup(f"Download in progress: {lastProgress}/{chapterCount}") + await asyncio.sleep(1) + # not cool, but we're risking this property to be reset by further downloads + await editFollowup(f"Done: {chapterCount}/{chapterCount}. Uploading your file.") + + +def build_novel_key(app: App, start: float, end: float) -> str: + start_str = str(int(start)) + end_str = "" if math.isinf(end) else str(int(end)) + return ":".join([app.good_source_name, app.good_file_name, start_str, end_str]) + + +class Novels(commands.Cog): + def __init__(self, bot): + self.bot: discord.Bot = bot + self.redis: redis.Redis = self.bot.get_redis() + + @discord.slash_command(name="download", description="Download a novel by URL") + @discord.option("url", description="Novel URL") + @discord.option("start", description="Start chapter", default=0) + @discord.option("end", description="End chapter", default=math.inf) + @discord.option( + "formats", description="Comma separated target formats", default="epub" + ) + async def download( + self, + ctx: discord.ApplicationContext, + url: str, + start: float, + end: float, + formats: str, + ): + if not url.startswith("http"): + await ctx.respond("You specified an invalid URL") + return + formats_list = list(map(str.strip, formats.split(","))) + logger.info(formats_list) + if not validate_formats(formats_list): + fs = ", ".join(available_formats) + await ctx.respond( + f"The format you specified is invalid, the available formats are: {fs}" + ) + # start thinking + await ctx.defer() + + app = await novel_by_url(url) + embed = discord.Embed( + title=app.crawler.novel_title, + url=app.crawler.novel_url, + description=app.crawler.novel_synopsis, + ) + embed.set_thumbnail(url=app.crawler.novel_cover) + embed.add_field(name="Author", value=app.crawler.novel_author, inline=False) + embed.add_field(name="Volumes", value=len(app.crawler.volumes)) + embed.add_field(name="Chapters", value=len(app.crawler.chapters)) + await ctx.respond(embed=embed) + + # set chapters + if math.isinf(end): + app.chapters = app.crawler.chapters[int(start) :] + else: + app.chapters = app.crawler.chapters[int(start) : int(end)] + + # set formats + app.output_formats = {x: (x in formats_list) for x in available_formats} + + # set output path + root = os.path.abspath(".discord_bot_output") + app.output_path = os.path.join(root, app.good_source_name, app.good_file_name) + shutil.rmtree(app.output_path, ignore_errors=True) + os.makedirs(app.output_path, exist_ok=True) + + followUp = await ctx.respond( + f"I don't have this file, downloading {len(app.chapters)} chapters, this will take a while." + ) + progress_report = update_progress(app, followUp.edit) + asyncio.create_task(progress_report) + + archive_list = await download_novel(app) + + for archive in archive_list: + filename, result = await upload_file(archive) + if isinstance(result, str): + await ctx.respond(f"Download URL: {result}") + elif isinstance(result, io.BufferedReader): + fileResponse = await ctx.respond( + file=discord.File(filename=filename, fp=result) + ) + attachment, *_ = fileResponse.attachments + logger.info(attachment.url) + # files:example.com:1_12329:fb2 + await self.redis.set( + name=build_novel_key(app, start, end), value=attachment.url + ) + # attachment.url + # logger.info(fileResponse.content) + else: + await ctx.respond(f"Failed to upload {filename}") + + # @discord.slash_command(name="hello", description="Say hello to the bot") + # async def hello(self, ctx): + # await ctx.respond("Hey!") + + @discord.slash_command(name="goodbye", description="Say goodbye to the bot") + async def goodbye(self, ctx): + await ctx.respond("Goodbye!") + + +def setup(bot): # this is called by Pycord to setup the cog + bot.add_cog(Novels(bot)) # add the cog to the bot diff --git a/lncrawl/bots/discord/config.py b/lncrawl/bots/discord/config.py index 7d6a411d1..4e095f61e 100644 --- a/lncrawl/bots/discord/config.py +++ b/lncrawl/bots/discord/config.py @@ -11,6 +11,7 @@ signal = os.getenv("DISCORD_SIGNAL_CHAR") or "!" discord_token = os.getenv("DISCORD_TOKEN") disable_search = os.getenv("DISCORD_DISABLE_SEARCH") == "true" +redis_uri = os.getenv("REDIS_CONNECTION_URI") session_retain_time_in_seconds = 4 * 60 * 60 max_active_handles = 150 diff --git a/lncrawl/bots/discord/discord_bot.py b/lncrawl/bots/discord/discord_bot.py index fa708bef6..fb7283ffa 100644 --- a/lncrawl/bots/discord/discord_bot.py +++ b/lncrawl/bots/discord/discord_bot.py @@ -1,130 +1,24 @@ -import os -import subprocess -from datetime import datetime -from typing import Dict - import discord -from . import config as C -from .config import logger -from .message_handler import MessageHandler - - -def get_bot_version(): - try: - result = subprocess.check_output(["git", "rev-list", "--count", "HEAD"]) - return result.decode("utf-8") - except Exception: - from lncrawl.assets import version - - return version.get_version() +import redis.asyncio as redis +from . import config as C -class DiscordBot(discord.Client): - bot_version = get_bot_version() - def __init__(self, *args, loop=None, **options): - options["shard_id"] = C.shard_id - options["shard_count"] = C.shard_count - options["heartbeat_timeout"] = 300 - options["guild_subscriptions"] = False - options["fetch_offline_members"] = False - self.handlers: Dict[str, MessageHandler] = {} - super().__init__(*args, loop=loop, **options) +class Bot(discord.Bot): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) - def start_bot(self): - self.bot_is_ready = False - os.environ["debug_mode"] = "yes" - self.run(C.discord_token) + self.redis = redis.from_url(url=C.redis_uri) + self.load_extension("lncrawl.bots.discord.cogs.novels") async def on_ready(self): - # Reset handler cache - self.handlers = {} - - print("Discord bot in online!") - activity = discord.Activity( - name="for 🔥%s🔥 (%s)" % (C.signal, self.bot_version), - type=discord.ActivityType.watching, - ) - await self.change_presence(activity=activity, status=discord.Status.online) - - self.bot_is_ready = True - - async def on_message(self, message): - if not self.bot_is_ready: - return # Not ready yet - if message.author == self.user: - return # I am not crazy to talk with myself - if message.author.bot: - return # Other bots are not edible - try: - # Cleanup unused handlers - self.cleanup_handlers() - - text = message.content - if isinstance(message.channel, discord.abc.PrivateChannel): - await self.handle_message(message) - elif text.startswith(C.signal) and len(text.split(C.signal)) == 2: - uid = str(message.author.id) - async with message.channel.typing(): - await message.channel.send( - f"Sending you a private message <@{uid}>" - ) - if uid in self.handlers: - self.handlers[uid].destroy() - - await self.handle_message(message) - - except IndexError as ex: - logger.exception("Index error reported", ex) - except Exception: - logger.exception("Something went wrong processing message") - - async def handle_message(self, message): - if self.is_closed(): - return - - try: - uid = str(message.author.id) - discriminator = message.author.discriminator - logger.info( - "Processing message from %s#%s", message.author.name, discriminator - ) - if uid in self.handlers: - self.handlers[uid].process(message) - # elif len(self.handlers) > C.max_active_handles or discriminator not in C.vip_users_ids: - # async with message.author.typing(): - # await message.author.send( - # "Sorry! I am too busy processing requests of other users.\n" - # "Please knock again in a few hours." - # ) - else: - logger.info( - "New handler for %s#%s [%s]", - message.author.name, - discriminator, - uid, - ) - self.handlers[uid] = MessageHandler(uid, self) - async with message.author.typing(): - await message.author.send( - "-" * 25 + "\n" + f"Hello <@{uid}>\n" + "-" * 25 + "\n" - ) - self.handlers[uid].process(message) - - except Exception: - logger.exception("While handling this message: %s", message) + print(f"{self.user} is ready and online!") + print(f"Redis ping successful: {await self.redis.ping()}") - def cleanup_handlers(self): - try: - cur_time = datetime.now() - for handler in self.handlers.values(): - if handler.is_busy(): - continue + def get_redis(self): + return self.redis - last_time = getattr(handler, "last_activity", cur_time) - if (cur_time - last_time).seconds > C.session_retain_time_in_seconds: - handler.destroy() - except Exception: - logger.exception("Failed to cleanup handlers") +client = Bot() +client.run(C.discord_token) diff --git a/lncrawl/bots/discord/utils.py b/lncrawl/bots/discord/utils.py new file mode 100644 index 000000000..312184419 --- /dev/null +++ b/lncrawl/bots/discord/utils.py @@ -0,0 +1,20 @@ +import asyncio +import functools +from typing import List +import typing +from .config import available_formats + + +def validate_formats(xs: List[str]): + for x in xs: + if not x in available_formats: + return False + return True + + +def to_thread(func: typing.Callable) -> typing.Coroutine: + @functools.wraps(func) + async def wrapper(*args, **kwargs): + return await asyncio.to_thread(func, *args, **kwargs) + + return wrapper diff --git a/lncrawl/core/app.py b/lncrawl/core/app.py index 9ef2fa321..f51c8ebb1 100644 --- a/lncrawl/core/app.py +++ b/lncrawl/core/app.py @@ -41,6 +41,7 @@ def __init__(self): self.book_cover: Optional[str] = None self.output_formats: Dict[OutputFormat, bool] = {} self.archived_outputs = None + self.good_source_name: str = "" self.good_file_name: str = "" self.no_suffix_after_filename = False atexit.register(self.destroy) @@ -147,9 +148,9 @@ def get_novel_info(self): word_boundary=True, ) - source_name = slugify(urlparse(self.crawler.home_url).netloc) + self.good_source_name = slugify(urlparse(self.crawler.home_url).netloc) self.output_path = os.path.join( - C.DEFAULT_OUTPUT_PATH, source_name, self.good_file_name + C.DEFAULT_OUTPUT_PATH, self.good_source_name, self.good_file_name ) # ----------------------------------------------------------------------- # @@ -209,17 +210,21 @@ def compress_books(self, archive_singles=False): # Get which paths to be archived with their base names path_to_process = [] - for fmt in available_formats: + + for fmt in list({k: v for k, v in self.output_formats.items() if v == True}): root_dir = os.path.join(self.output_path, fmt) if os.path.isdir(root_dir): path_to_process.append( [root_dir, self.good_file_name + " (" + fmt + ")"] ) + logger.info("path_to_process: %s", path_to_process) + # Archive files self.archived_outputs = [] for root_dir, output_name in path_to_process: file_list = os.listdir(root_dir) + logger.info("file_list: %s", file_list) if len(file_list) == 0: logger.info("It has no files: %s", root_dir) continue @@ -232,6 +237,7 @@ def compress_books(self, archive_singles=False): ): logger.info("Not archiving single file inside %s" % root_dir) archived_file = os.path.join(root_dir, file_list[0]) + logger.info("archived_file: %s", archived_file) else: base_path = os.path.join(self.output_path, output_name) logger.info("Compressing %s to %s" % (root_dir, base_path)) @@ -240,7 +246,10 @@ def compress_books(self, archive_singles=False): format="zip", root_dir=root_dir, ) - logger.info("Compressed:", os.path.basename(archived_file)) + logger.info(f"Compressed: {os.path.basename(archived_file)}") if archived_file: + logger.info( + "appending archived file to archived_outputs: %s", archived_file + ) self.archived_outputs.append(archived_file) diff --git a/lncrawl/core/crawler.py b/lncrawl/core/crawler.py index a5ae6b911..2cd95da1c 100644 --- a/lncrawl/core/crawler.py +++ b/lncrawl/core/crawler.py @@ -148,7 +148,7 @@ def download_chapters( unit="item", fail_fast=fail_fast, ) - for (index, future) in futures.items(): + for index, future in futures.items(): try: chapter = chapters[index] chapter.body = future.result() diff --git a/lncrawl/core/taskman.py b/lncrawl/core/taskman.py index ed48342a9..494c780c6 100644 --- a/lncrawl/core/taskman.py +++ b/lncrawl/core/taskman.py @@ -28,6 +28,7 @@ def __init__(self, workers: int = MAX_WORKER_COUNT) -> None: """ self._futures: List[Future] = [] self.init_executor(workers) + self.future_progress = 0 def __del__(self) -> None: if hasattr(self, "_executor"): @@ -181,9 +182,11 @@ def resolve_futures( ) try: + self.future_progress = 0 for future in futures: if fail_fast: future.result(timeout) + self.future_progress += 1 bar.update() continue try: @@ -197,6 +200,7 @@ def resolve_futures( bar.clear() logger.warning(f"{type(e).__name__}: {e}") finally: + self.future_progress += 1 bar.update() finally: Thread(target=lambda: self.cancel_futures(futures)).start() diff --git a/requirements.txt b/requirements.txt index a79d9b2af..c92e7068a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,6 +37,7 @@ setuptools<=60.0.0 # win_unicode_console~=0.5 # bot requirements -discord.py>=1.0.0,<2.0.0 +py-cord>=2.4.0 python-telegram-bot<12 +redis[hiredis] # pydrive>=1.3.1,<2.0.0 From 4bf4011318f13531d58bea156adef8b2a3bf68fd Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 15:43:54 +0600 Subject: [PATCH 2/8] chore: redis caching for /download cmd --- lncrawl/bots/discord/cogs/novels.py | 79 ++++++++++++++++++++--------- lncrawl/bots/discord/discord_bot.py | 2 +- lncrawl/core/app.py | 22 ++++---- 3 files changed, 66 insertions(+), 37 deletions(-) diff --git a/lncrawl/bots/discord/cogs/novels.py b/lncrawl/bots/discord/cogs/novels.py index 58fba6d95..92e7f1331 100644 --- a/lncrawl/bots/discord/cogs/novels.py +++ b/lncrawl/bots/discord/cogs/novels.py @@ -7,7 +7,7 @@ import logging import redis from discord.ext import commands -from typing import Callable +from typing import Callable, Tuple from lncrawl.core.app import App from lncrawl.core.crawler import Crawler @@ -33,7 +33,7 @@ def download_novel(app: App) -> list: @to_thread -def novel_by_url(url) -> discord.ApplicationContext: +def novel_by_url(url: str) -> App: app = App() app.user_input = url app.crawler = prepare_crawler(app.user_input) @@ -43,23 +43,22 @@ def novel_by_url(url) -> discord.ApplicationContext: @to_thread -def upload_file(archive) -> str | io.BufferedIOBase: +def upload_file(filename: str, archive: str) -> str | io.BufferedIOBase | None: # Check file size - filename = os.path.basename(archive) file_size = os.stat(archive).st_size if file_size >= 8388608: try: description = "Generated by: lncrawl Discord bot" - return filename, upload(archive, description) + return upload(archive, description) except Exception as e: logger.error("Failed to upload file: %s", archive, e) - return filename, None + return None - return filename, open(archive, "rb") + return open(archive, "rb") -def get_filename(archive) -> str: - return os.path.basename(archive) +def archive_metadata(archive) -> Tuple[str, str]: + return os.path.basename(os.path.dirname(archive)), os.path.basename(archive) async def update_progress(app: App, editFollowup: Callable[[str], None]): @@ -76,10 +75,22 @@ async def update_progress(app: App, editFollowup: Callable[[str], None]): await editFollowup(f"Done: {chapterCount}/{chapterCount}. Uploading your file.") -def build_novel_key(app: App, start: float, end: float) -> str: - start_str = str(int(start)) - end_str = "" if math.isinf(end) else str(int(end)) - return ":".join([app.good_source_name, app.good_file_name, start_str, end_str]) +def build_hash_novel_key(app: App, start: float, end: float, format: str) -> str: + return ":".join( + [ + "files", + app.good_file_name, + f"{int(start)}_{'' if math.isinf(end) else str(int(end))}", + format, + ] + ) + + +async def get_hash_value(redis: redis.Redis, hash: str, source: str) -> str | None: + return await redis.hget( + name=hash, + key=source, + ) class Novels(commands.Cog): @@ -106,7 +117,6 @@ async def download( await ctx.respond("You specified an invalid URL") return formats_list = list(map(str.strip, formats.split(","))) - logger.info(formats_list) if not validate_formats(formats_list): fs = ", ".join(available_formats) await ctx.respond( @@ -115,7 +125,7 @@ async def download( # start thinking await ctx.defer() - app = await novel_by_url(url) + app: App = await novel_by_url(url) embed = discord.Embed( title=app.crawler.novel_title, url=app.crawler.novel_url, @@ -127,6 +137,27 @@ async def download( embed.add_field(name="Chapters", value=len(app.crawler.chapters)) await ctx.respond(embed=embed) + # check if we have this cached + # todo: use HKEYS and check if there are other sources, propose those to the user + existingFiles = { + k: await get_hash_value( + redis=self.redis, + hash=build_hash_novel_key(app, start, end, k), + source=app.good_source_name, + ) + for k in formats_list + } + for fmt, cachedUrl in existingFiles.items(): + if not cachedUrl: + continue + logger.debug("format %s exists: %s", fmt, cachedUrl) + formats_list.remove(fmt) + await ctx.respond(f"**{fmt}**: {cachedUrl}") + + if not formats_list: + logger.debug("no formats left to dl, returning") + return + # set chapters if math.isinf(end): app.chapters = app.crawler.chapters[int(start) :] @@ -151,23 +182,23 @@ async def download( archive_list = await download_novel(app) for archive in archive_list: - filename, result = await upload_file(archive) + archive_format, archive_name = archive_metadata(archive) + result = await upload_file(archive_name, archive) if isinstance(result, str): await ctx.respond(f"Download URL: {result}") elif isinstance(result, io.BufferedReader): fileResponse = await ctx.respond( - file=discord.File(filename=filename, fp=result) + file=discord.File(filename=archive_name, fp=result) ) attachment, *_ = fileResponse.attachments - logger.info(attachment.url) - # files:example.com:1_12329:fb2 - await self.redis.set( - name=build_novel_key(app, start, end), value=attachment.url + # files:novel_name:1_12329:fb2 source_name https://source + await self.redis.hset( + name=build_hash_novel_key(app, start, end, archive_format), + key=app.good_source_name, + value=attachment.url, ) - # attachment.url - # logger.info(fileResponse.content) else: - await ctx.respond(f"Failed to upload {filename}") + await ctx.respond(f"Failed to upload {archive_name}") # @discord.slash_command(name="hello", description="Say hello to the bot") # async def hello(self, ctx): diff --git a/lncrawl/bots/discord/discord_bot.py b/lncrawl/bots/discord/discord_bot.py index fb7283ffa..a53da37c3 100644 --- a/lncrawl/bots/discord/discord_bot.py +++ b/lncrawl/bots/discord/discord_bot.py @@ -9,7 +9,7 @@ class Bot(discord.Bot): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.redis = redis.from_url(url=C.redis_uri) + self.redis = redis.from_url(url=C.redis_uri, decode_responses=True) self.load_extension("lncrawl.bots.discord.cogs.novels") async def on_ready(self): diff --git a/lncrawl/core/app.py b/lncrawl/core/app.py index f51c8ebb1..46993f2fb 100644 --- a/lncrawl/core/app.py +++ b/lncrawl/core/app.py @@ -178,7 +178,7 @@ def start_download(self): def bind_books(self): """Requires: crawler, chapters, output_path, pack_by_volume, book_cover, output_formats""" - logger.info("Processing data for binding") + logger.debug("Processing data for binding") assert self.crawler data = {} @@ -206,8 +206,7 @@ def bind_books(self): # ----------------------------------------------------------------------- # def compress_books(self, archive_singles=False): - logger.info("Compressing output...") - + logger.debug("Compressing output...") # Get which paths to be archived with their base names path_to_process = [] @@ -218,15 +217,15 @@ def compress_books(self, archive_singles=False): [root_dir, self.good_file_name + " (" + fmt + ")"] ) - logger.info("path_to_process: %s", path_to_process) + logger.debug("path_to_process: %s", path_to_process) # Archive files self.archived_outputs = [] for root_dir, output_name in path_to_process: file_list = os.listdir(root_dir) - logger.info("file_list: %s", file_list) + logger.debug("file_list: %s", file_list) if len(file_list) == 0: - logger.info("It has no files: %s", root_dir) + logger.debug("It has no files: %s", root_dir) continue archived_file = None @@ -235,21 +234,20 @@ def compress_books(self, archive_singles=False): and not archive_singles and not os.path.isdir(os.path.join(root_dir, file_list[0])) ): - logger.info("Not archiving single file inside %s" % root_dir) + logger.debug("Not archiving single file inside %s" % root_dir) archived_file = os.path.join(root_dir, file_list[0]) - logger.info("archived_file: %s", archived_file) else: - base_path = os.path.join(self.output_path, output_name) - logger.info("Compressing %s to %s" % (root_dir, base_path)) + base_path = os.path.join(root_dir, output_name) + logger.debug("Compressing %s to %s" % (root_dir, base_path)) archived_file = shutil.make_archive( base_path, format="zip", root_dir=root_dir, ) - logger.info(f"Compressed: {os.path.basename(archived_file)}") + logger.debug(f"Compressed: {os.path.basename(archived_file)}") if archived_file: - logger.info( + logger.debug( "appending archived file to archived_outputs: %s", archived_file ) self.archived_outputs.append(archived_file) From 3e0e5bbc72cb0e68f42a92ef337c9b3fbc0d296e Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 15:48:04 +0600 Subject: [PATCH 3/8] chore: _ --- lncrawl/binders/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lncrawl/binders/__init__.py b/lncrawl/binders/__init__.py index 9a63ce752..89928f711 100644 --- a/lncrawl/binders/__init__.py +++ b/lncrawl/binders/__init__.py @@ -57,21 +57,15 @@ def generate_books(app, data): for fmt in formats_to_generate: try: if fmt == "text": - print("text") - logger.info("creating text") outputs[fmt] = make_texts(app, data) elif fmt == "web": outputs[fmt] = make_webs(app, data) elif fmt == "epub": - logger.info("creating epub") outputs[fmt] = make_epubs(app, data) elif fmt in depends_on_epub: - logger.info("creating %s", fmt) - print(fmt) outputs[fmt] = make_calibres(app, outputs["epub"], fmt) except Exception as err: - print(err) logger.exception('Failed to generate "%s": %s' % (fmt, err)) finally: progress += 1 From ea883f95d3d926e63ac27d9041ba6731b2e91332 Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 16:11:37 +0600 Subject: [PATCH 4/8] chore: _ --- lncrawl/binders/calibre.py | 3 +-- lncrawl/binders/epub.py | 2 +- lncrawl/binders/text.py | 2 +- lncrawl/bots/discord/cogs/novels.py | 37 ++++++++++++++--------------- lncrawl/bots/discord/discord_bot.py | 8 +++++-- lncrawl/bots/discord/utils.py | 5 ++-- 6 files changed, 29 insertions(+), 28 deletions(-) diff --git a/lncrawl/binders/calibre.py b/lncrawl/binders/calibre.py index 8b22ffa4f..3740b2fdb 100644 --- a/lncrawl/binders/calibre.py +++ b/lncrawl/binders/calibre.py @@ -104,10 +104,9 @@ def epub_to_calibre(app, epub_file, out_fmt): run_ebook_convert(*args) if os.path.exists(out_file): - print("Created: %s" % out_file_name) + logger.info("Created: %s" % out_file_name) return out_file else: - print("conversion failed") logger.error("[%s] conversion failed: %s", out_fmt, epub_file_name) return None diff --git a/lncrawl/binders/epub.py b/lncrawl/binders/epub.py index f958fd722..64a0067c2 100644 --- a/lncrawl/binders/epub.py +++ b/lncrawl/binders/epub.py @@ -196,7 +196,7 @@ def bind_epub_book( os.makedirs(epub_path, exist_ok=True) epub.write_epub(file_path, book, {}) - print("Created: %s.epub" % file_name) + logger.info("Created: %s.epub" % file_name) return file_path diff --git a/lncrawl/binders/text.py b/lncrawl/binders/text.py index cfc3af904..bda8e3c7b 100644 --- a/lncrawl/binders/text.py +++ b/lncrawl/binders/text.py @@ -25,5 +25,5 @@ def make_texts(app, data): file.write(text) text_files.append(file_name) - print("Created: %d text files" % len(text_files)) + logger.info("Created: %d text files" % len(text_files)) return text_files diff --git a/lncrawl/bots/discord/cogs/novels.py b/lncrawl/bots/discord/cogs/novels.py index 92e7f1331..79f665af3 100644 --- a/lncrawl/bots/discord/cogs/novels.py +++ b/lncrawl/bots/discord/cogs/novels.py @@ -43,7 +43,7 @@ def novel_by_url(url: str) -> App: @to_thread -def upload_file(filename: str, archive: str) -> str | io.BufferedIOBase | None: +def upload_file(archive: str) -> str | io.BufferedIOBase | None: # Check file size file_size = os.stat(archive).st_size if file_size >= 8388608: @@ -93,6 +93,15 @@ async def get_hash_value(redis: redis.Redis, hash: str, source: str) -> str | No ) +def configure_output_path(app: App): + # set output path + root = os.path.abspath(".discord_bot_output") + output_path = os.path.join(root, app.good_source_name, app.good_file_name) + shutil.rmtree(output_path, ignore_errors=True) + os.makedirs(output_path, exist_ok=True) + return output_path + + class Novels(commands.Cog): def __init__(self, bot): self.bot: discord.Bot = bot @@ -164,26 +173,24 @@ async def download( else: app.chapters = app.crawler.chapters[int(start) : int(end)] - # set formats - app.output_formats = {x: (x in formats_list) for x in available_formats} - - # set output path - root = os.path.abspath(".discord_bot_output") - app.output_path = os.path.join(root, app.good_source_name, app.good_file_name) - shutil.rmtree(app.output_path, ignore_errors=True) - os.makedirs(app.output_path, exist_ok=True) - followUp = await ctx.respond( f"I don't have this file, downloading {len(app.chapters)} chapters, this will take a while." ) + + # set formats + app.output_formats = {x: (x in formats_list) for x in available_formats} + # set up directories + app.output_path = configure_output_path(app) + # update the user with dl progress progress_report = update_progress(app, followUp.edit) asyncio.create_task(progress_report) + # start the download archive_list = await download_novel(app) for archive in archive_list: archive_format, archive_name = archive_metadata(archive) - result = await upload_file(archive_name, archive) + result = await upload_file(archive) if isinstance(result, str): await ctx.respond(f"Download URL: {result}") elif isinstance(result, io.BufferedReader): @@ -200,14 +207,6 @@ async def download( else: await ctx.respond(f"Failed to upload {archive_name}") - # @discord.slash_command(name="hello", description="Say hello to the bot") - # async def hello(self, ctx): - # await ctx.respond("Hey!") - - @discord.slash_command(name="goodbye", description="Say goodbye to the bot") - async def goodbye(self, ctx): - await ctx.respond("Goodbye!") - def setup(bot): # this is called by Pycord to setup the cog bot.add_cog(Novels(bot)) # add the cog to the bot diff --git a/lncrawl/bots/discord/discord_bot.py b/lncrawl/bots/discord/discord_bot.py index a53da37c3..5d2e99c05 100644 --- a/lncrawl/bots/discord/discord_bot.py +++ b/lncrawl/bots/discord/discord_bot.py @@ -1,9 +1,12 @@ +import logging import discord import redis.asyncio as redis from . import config as C +logger = logging.getLogger(__name__) + class Bot(discord.Bot): def __init__(self, *args, **kwargs): @@ -13,8 +16,9 @@ def __init__(self, *args, **kwargs): self.load_extension("lncrawl.bots.discord.cogs.novels") async def on_ready(self): - print(f"{self.user} is ready and online!") - print(f"Redis ping successful: {await self.redis.ping()}") + # todo: activity and stuff + logger.debug(f"{self.user} is ready and online!") + logger.debug(f"Redis ping successful: {await self.redis.ping()}") def get_redis(self): return self.redis diff --git a/lncrawl/bots/discord/utils.py b/lncrawl/bots/discord/utils.py index 312184419..96d3398fa 100644 --- a/lncrawl/bots/discord/utils.py +++ b/lncrawl/bots/discord/utils.py @@ -1,7 +1,6 @@ import asyncio import functools -from typing import List -import typing +from typing import Callable, Coroutine, List from .config import available_formats @@ -12,7 +11,7 @@ def validate_formats(xs: List[str]): return True -def to_thread(func: typing.Callable) -> typing.Coroutine: +def to_thread(func: Callable) -> Coroutine: @functools.wraps(func) async def wrapper(*args, **kwargs): return await asyncio.to_thread(func, *args, **kwargs) From 15605d29f3ad0337275eb09d94887fc50a2290fa Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 20:34:51 +0600 Subject: [PATCH 5/8] chore: slightly refactor, add /search cmd --- lncrawl/bots/discord/cogs/novels.py | 168 +++++++++---------------- lncrawl/bots/discord/components.py | 54 ++++++++ lncrawl/bots/discord/novel_handlers.py | 120 ++++++++++++++++++ 3 files changed, 234 insertions(+), 108 deletions(-) create mode 100644 lncrawl/bots/discord/components.py create mode 100644 lncrawl/bots/discord/novel_handlers.py diff --git a/lncrawl/bots/discord/cogs/novels.py b/lncrawl/bots/discord/cogs/novels.py index 79f665af3..50e8ee914 100644 --- a/lncrawl/bots/discord/cogs/novels.py +++ b/lncrawl/bots/discord/cogs/novels.py @@ -1,107 +1,32 @@ import asyncio import io import math -import os -import shutil import discord import logging import redis from discord.ext import commands -from typing import Callable, Tuple from lncrawl.core.app import App -from lncrawl.core.crawler import Crawler -from lncrawl.utils.uploader import upload -from ..utils import to_thread, validate_formats + +from ..components import NovelMenu +from ..utils import validate_formats from ..config import available_formats -from ....core.sources import prepare_crawler +from ..novel_handlers import ( + archive_metadata, + build_hash_novel_key, + configure_output_path, + get_hash_value, + destroy_app, + download_novel, + novel_by_title, + novel_by_url, + upload_file, + update_progress, +) logger = logging.getLogger(__name__) -@to_thread -def download_novel(app: App) -> list: - try: - app.pack_by_volume = False - app.start_download() - app.bind_books() - app.compress_books() - assert isinstance(app.archived_outputs, list) - return app.archived_outputs - except Exception as ex: - logger.exception(ex) - - -@to_thread -def novel_by_url(url: str) -> App: - app = App() - app.user_input = url - app.crawler = prepare_crawler(app.user_input) - app.get_novel_info() - assert isinstance(app.crawler, Crawler) - return app - - -@to_thread -def upload_file(archive: str) -> str | io.BufferedIOBase | None: - # Check file size - file_size = os.stat(archive).st_size - if file_size >= 8388608: - try: - description = "Generated by: lncrawl Discord bot" - return upload(archive, description) - except Exception as e: - logger.error("Failed to upload file: %s", archive, e) - return None - - return open(archive, "rb") - - -def archive_metadata(archive) -> Tuple[str, str]: - return os.path.basename(os.path.dirname(archive)), os.path.basename(archive) - - -async def update_progress(app: App, editFollowup: Callable[[str], None]): - chapterCount = len(app.chapters) - lastProgress = 0 - while app.crawler.future_progress < chapterCount: - # this is shit, but it ensures we won't be stuck if we miss the done window - if app.crawler.future_progress < lastProgress: - break - lastProgress = app.crawler.future_progress - await editFollowup(f"Download in progress: {lastProgress}/{chapterCount}") - await asyncio.sleep(1) - # not cool, but we're risking this property to be reset by further downloads - await editFollowup(f"Done: {chapterCount}/{chapterCount}. Uploading your file.") - - -def build_hash_novel_key(app: App, start: float, end: float, format: str) -> str: - return ":".join( - [ - "files", - app.good_file_name, - f"{int(start)}_{'' if math.isinf(end) else str(int(end))}", - format, - ] - ) - - -async def get_hash_value(redis: redis.Redis, hash: str, source: str) -> str | None: - return await redis.hget( - name=hash, - key=source, - ) - - -def configure_output_path(app: App): - # set output path - root = os.path.abspath(".discord_bot_output") - output_path = os.path.join(root, app.good_source_name, app.good_file_name) - shutil.rmtree(output_path, ignore_errors=True) - os.makedirs(output_path, exist_ok=True) - return output_path - - class Novels(commands.Cog): def __init__(self, bot): self.bot: discord.Bot = bot @@ -165,6 +90,7 @@ async def download( if not formats_list: logger.debug("no formats left to dl, returning") + await destroy_app(app) return # set chapters @@ -188,24 +114,50 @@ async def download( # start the download archive_list = await download_novel(app) - for archive in archive_list: - archive_format, archive_name = archive_metadata(archive) - result = await upload_file(archive) - if isinstance(result, str): - await ctx.respond(f"Download URL: {result}") - elif isinstance(result, io.BufferedReader): - fileResponse = await ctx.respond( - file=discord.File(filename=archive_name, fp=result) - ) - attachment, *_ = fileResponse.attachments - # files:novel_name:1_12329:fb2 source_name https://source - await self.redis.hset( - name=build_hash_novel_key(app, start, end, archive_format), - key=app.good_source_name, - value=attachment.url, - ) - else: - await ctx.respond(f"Failed to upload {archive_name}") + try: + for archive in archive_list: + archive_format, archive_name = archive_metadata(archive) + result = await upload_file(archive) + if isinstance(result, str): + await ctx.respond(f"Download URL: {result}") + elif isinstance(result, io.BufferedReader): + fileResponse = await ctx.respond( + file=discord.File(filename=archive_name, fp=result) + ) + attachment, *_ = fileResponse.attachments + # files:novel_name:1_12329:fb2 source_name https://source + await self.redis.hset( + name=build_hash_novel_key(app, start, end, archive_format), + key=app.good_source_name, + value=attachment.url, + ) + else: + await ctx.respond(f"Failed to upload {archive_name}") + finally: + await destroy_app(app) + + @discord.slash_command(name="search", description="Search a novel by name") + @discord.option("name", description="Lightnovel name") + @discord.option("pattern", description="Regex pattern", default="") + async def search( + self, + ctx: discord.ApplicationContext, + name: str, + pattern: str, + ): + if len(name) < 4: + await ctx.respond("Query string is too short") + return + # start thinking + await ctx.defer() + app: App = await novel_by_title(name, pattern) + # app.search_results + selectNovelView = NovelMenu() + selectNovelView.add_items(novelList=app.search_results[:24]) + await ctx.respond( + "Select a novel, use the returned link in the `/download` command", + view=selectNovelView, + ) def setup(bot): # this is called by Pycord to setup the cog diff --git a/lncrawl/bots/discord/components.py b/lncrawl/bots/discord/components.py new file mode 100644 index 000000000..621d05bfc --- /dev/null +++ b/lncrawl/bots/discord/components.py @@ -0,0 +1,54 @@ +from typing import List +import discord + +from lncrawl.models.search_result import CombinedSearchResult + + +class NovelSelectMenu(discord.ui.Select): + def __init__(self): + super().__init__( + placeholder="Select a novel...", + min_values=1, + max_values=1, + row=0, + ) + self.novelList = [] + + def fill_options(self, novelList: List[CombinedSearchResult]) -> None: + self.novelList = novelList + for i, item in enumerate(novelList): + nc = len(item.novels) + self.add_option( + label=item.title, + value=str(i), + description=f"{nc} source{'s'[:nc^1]}", + ) + + async def callback(self, interaction: discord.Interaction): + assert self.view is not None + value = self.values[0] + novel_list = [ + f"{i+1}. <{item.url}> {item.info or ''}".strip() + for i, item in enumerate(self.novelList[int(value)].novels) + ] + + message = "" + novel_count = len(novel_list) + # split into separate messages w/ length up to 2000 chars + for i, line in enumerate(novel_list): + message_len = len(line) + if message_len >= 2000: + await interaction.response.send_message(message.strip()) + message = "" + message += line + "\n" + if i == novel_count - 1: + await interaction.response.send_message(message.strip()) + + return + + +class NovelMenu(discord.ui.View): + def add_items(self, novelList: List[CombinedSearchResult]) -> None: + selectMenu = NovelSelectMenu() + selectMenu.fill_options(novelList) + self.add_item(selectMenu) diff --git a/lncrawl/bots/discord/novel_handlers.py b/lncrawl/bots/discord/novel_handlers.py new file mode 100644 index 000000000..f1e691abf --- /dev/null +++ b/lncrawl/bots/discord/novel_handlers.py @@ -0,0 +1,120 @@ +import asyncio +import io +import math +import os +import re +import shutil +import logging +import redis + +from typing import Callable, Tuple +from .utils import to_thread +from ...core.app import App +from ...core.sources import crawler_list, prepare_crawler +from ...core.crawler import Crawler +from ...utils.uploader import upload + +logger = logging.getLogger(__name__) + + +@to_thread +def download_novel(app: App) -> list: + try: + app.pack_by_volume = False + app.start_download() + app.bind_books() + app.compress_books() + assert isinstance(app.archived_outputs, list) + return app.archived_outputs + except Exception as ex: + logger.exception(ex) + + +@to_thread +def novel_by_url(url: str) -> App: + app = App() + app.user_input = url + app.crawler = prepare_crawler(app.user_input) + app.get_novel_info() + assert isinstance(app.crawler, Crawler) + return app + + +@to_thread +def novel_by_title(name: str, pattern: str) -> App: + app = App() + app.user_input = name.strip() + app.crawler_links = [ + str(link) + for link, crawler in crawler_list.items() + if crawler.search_novel != Crawler.search_novel + and (not pattern or re.search(pattern, link)) + ] + + app.search_novel() + return app + + +@to_thread +def upload_file(archive: str) -> str | io.BufferedIOBase | None: + # Check file size + file_size = os.stat(archive).st_size + if file_size >= 8388608: + try: + description = "Generated by: lncrawl Discord bot" + return upload(archive, description) + except Exception as e: + logger.error("Failed to upload file: %s", archive, e) + return None + + return open(archive, "rb") + + +@to_thread +def destroy_app(app: App): + app.destroy() + + +def archive_metadata(archive) -> Tuple[str, str]: + return os.path.basename(os.path.dirname(archive)), os.path.basename(archive) + + +async def update_progress(app: App, editFollowup: Callable[[str], None]): + chapterCount = len(app.chapters) + lastProgress = 0 + while app.crawler.future_progress < chapterCount: + # this is shit, but it ensures we won't be stuck if we miss the done window + if app.crawler.future_progress < lastProgress: + break + lastProgress = app.crawler.future_progress + await editFollowup(f"Download in progress: {lastProgress}/{chapterCount}") + await asyncio.sleep(1) + # not cool, but we're risking this property to be reset by further downloads + await editFollowup(f"Done: {chapterCount}/{chapterCount}. Uploading your file.") + + +def build_hash_novel_key(app: App, start: float, end: float, format: str) -> str: + return ":".join( + [ + "files", + app.good_file_name, + f"{int(start)}_{'' if math.isinf(end) else str(int(end))}", + format, + ] + ) + + +async def get_hash_value(redis: redis.Redis, hash: str, source: str) -> str | None: + return await redis.hget( + name=hash, + key=source, + ) + + +def configure_output_path(app: App): + # set output path + root = os.path.abspath(".discord_bot_output") + output_path = os.path.join(root, app.good_source_name, app.good_file_name) + shutil.rmtree(output_path, ignore_errors=True) + os.makedirs(output_path, exist_ok=True) + return output_path From c953862639d072c54361b0342003ac0ec81b4e56 Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 21:02:55 +0600 Subject: [PATCH 6/8] fix: message splitting --- lncrawl/bots/discord/components.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lncrawl/bots/discord/components.py b/lncrawl/bots/discord/components.py index 621d05bfc..067b42f79 100644 --- a/lncrawl/bots/discord/components.py +++ b/lncrawl/bots/discord/components.py @@ -34,15 +34,22 @@ async def callback(self, interaction: discord.Interaction): message = "" novel_count = len(novel_list) + responded = False + reply = ( + lambda msg: interaction.response.send_message(msg.strip()) + if not responded + else interaction.followup.send(msg.strip()) + ) # split into separate messages w/ length up to 2000 chars for i, line in enumerate(novel_list): - message_len = len(line) - if message_len >= 2000: - await interaction.response.send_message(message.strip()) + message_len = len(message) + if (message_len + len(line) + 1) >= 2000: + await reply(message) + responded = True message = "" message += line + "\n" if i == novel_count - 1: - await interaction.response.send_message(message.strip()) + await reply(message) return From b7c30795cc60b1e26700b76afe4a4ad4ea8d708f Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Fri, 17 Feb 2023 21:11:13 +0600 Subject: [PATCH 7/8] chore: allow other file types --- lncrawl/bots/discord/config.py | 4 ++-- lncrawl/core/app.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lncrawl/bots/discord/config.py b/lncrawl/bots/discord/config.py index 4e095f61e..bb4d61797 100644 --- a/lncrawl/bots/discord/config.py +++ b/lncrawl/bots/discord/config.py @@ -26,8 +26,8 @@ "text", "web", "mobi", - #'pdf', - #'fb2', + "pdf", + "fb2", ] os.makedirs("logs", exist_ok=True) diff --git a/lncrawl/core/app.py b/lncrawl/core/app.py index 46993f2fb..4cf45c4a3 100644 --- a/lncrawl/core/app.py +++ b/lncrawl/core/app.py @@ -61,7 +61,7 @@ def destroy(self): if self.crawler: self.crawler.__del__() self.chapters.clear() - logger.info("App destroyed") + logger.debug("App destroyed") # ----------------------------------------------------------------------- # From 0e353f9bd71c98ad13a309f12d63fc4ce4732241 Mon Sep 17 00:00:00 2001 From: kuwoyuki Date: Sat, 18 Feb 2023 23:47:34 +0600 Subject: [PATCH 8/8] chore: remove redis caching from dev --- compose.yml | 22 +++++++-------- lncrawl/bots/discord/cogs/novels.py | 37 +++----------------------- lncrawl/bots/discord/components.py | 6 ++--- lncrawl/bots/discord/config.py | 1 - lncrawl/bots/discord/discord_bot.py | 7 ----- lncrawl/bots/discord/novel_handlers.py | 26 +++--------------- lncrawl/bots/discord/utils.py | 6 ++--- requirements.txt | 1 - 8 files changed, 23 insertions(+), 83 deletions(-) diff --git a/compose.yml b/compose.yml index 21e707ace..b3d94da2b 100644 --- a/compose.yml +++ b/compose.yml @@ -30,14 +30,14 @@ services: # environment: # CLOUD_DRIVE: "GOFILE" # TELEGRAM_TOKEN: "${TELEGRAM_TOKEN}" - redis: - image: redis:alpine - restart: always - ports: - - "6379:6379" - command: redis-server --save 20 1 --loglevel warning - volumes: - - redis_data:/data + # redis: + # image: redis:alpine + # restart: always + # ports: + # - "6379:6379" + # command: redis-server --save 20 1 --loglevel warning + # volumes: + # - redis_data:/data discord-bot: image: lncrawl @@ -54,6 +54,6 @@ services: DISCORD_SIGNAL_CHAR: "${DISCORD_SIGNAL_CHAR}" DISCORD_DISABLE_SEARCH: "${DISCORD_DISABLE_SEARCH}" -volumes: - redis_data: - driver: local +# volumes: +# redis_data: +# driver: local diff --git a/lncrawl/bots/discord/cogs/novels.py b/lncrawl/bots/discord/cogs/novels.py index 50e8ee914..35a3aa70e 100644 --- a/lncrawl/bots/discord/cogs/novels.py +++ b/lncrawl/bots/discord/cogs/novels.py @@ -3,7 +3,6 @@ import math import discord import logging -import redis from discord.ext import commands from lncrawl.core.app import App @@ -13,9 +12,7 @@ from ..config import available_formats from ..novel_handlers import ( archive_metadata, - build_hash_novel_key, configure_output_path, - get_hash_value, destroy_app, download_novel, novel_by_title, @@ -30,7 +27,6 @@ class Novels(commands.Cog): def __init__(self, bot): self.bot: discord.Bot = bot - self.redis: redis.Redis = self.bot.get_redis() @discord.slash_command(name="download", description="Download a novel by URL") @discord.option("url", description="Novel URL") @@ -71,28 +67,6 @@ async def download( embed.add_field(name="Chapters", value=len(app.crawler.chapters)) await ctx.respond(embed=embed) - # check if we have this cached - # todo: use HKEYS and check if there are other sources, propose those to the user - existingFiles = { - k: await get_hash_value( - redis=self.redis, - hash=build_hash_novel_key(app, start, end, k), - source=app.good_source_name, - ) - for k in formats_list - } - for fmt, cachedUrl in existingFiles.items(): - if not cachedUrl: - continue - logger.debug("format %s exists: %s", fmt, cachedUrl) - formats_list.remove(fmt) - await ctx.respond(f"**{fmt}**: {cachedUrl}") - - if not formats_list: - logger.debug("no formats left to dl, returning") - await destroy_app(app) - return - # set chapters if math.isinf(end): app.chapters = app.crawler.chapters[int(start) :] @@ -121,16 +95,11 @@ async def download( if isinstance(result, str): await ctx.respond(f"Download URL: {result}") elif isinstance(result, io.BufferedReader): - fileResponse = await ctx.respond( + await ctx.respond( file=discord.File(filename=archive_name, fp=result) ) - attachment, *_ = fileResponse.attachments - # files:novel_name:1_12329:fb2 source_name https://source - await self.redis.hset( - name=build_hash_novel_key(app, start, end, archive_format), - key=app.good_source_name, - value=attachment.url, - ) + # cache if needed + # attachment, *_ = fileResponse.attachments else: await ctx.respond(f"Failed to upload {archive_name}") finally: diff --git a/lncrawl/bots/discord/components.py b/lncrawl/bots/discord/components.py index 067b42f79..5ae037379 100644 --- a/lncrawl/bots/discord/components.py +++ b/lncrawl/bots/discord/components.py @@ -1,5 +1,5 @@ -from typing import List import discord +import typing as t from lncrawl.models.search_result import CombinedSearchResult @@ -14,7 +14,7 @@ def __init__(self): ) self.novelList = [] - def fill_options(self, novelList: List[CombinedSearchResult]) -> None: + def fill_options(self, novelList: t.List[CombinedSearchResult]) -> None: self.novelList = novelList for i, item in enumerate(novelList): nc = len(item.novels) @@ -55,7 +55,7 @@ async def callback(self, interaction: discord.Interaction): class NovelMenu(discord.ui.View): - def add_items(self, novelList: List[CombinedSearchResult]) -> None: + def add_items(self, novelList: t.List[CombinedSearchResult]) -> None: selectMenu = NovelSelectMenu() selectMenu.fill_options(novelList) self.add_item(selectMenu) diff --git a/lncrawl/bots/discord/config.py b/lncrawl/bots/discord/config.py index bb4d61797..c692b5c21 100644 --- a/lncrawl/bots/discord/config.py +++ b/lncrawl/bots/discord/config.py @@ -11,7 +11,6 @@ signal = os.getenv("DISCORD_SIGNAL_CHAR") or "!" discord_token = os.getenv("DISCORD_TOKEN") disable_search = os.getenv("DISCORD_DISABLE_SEARCH") == "true" -redis_uri = os.getenv("REDIS_CONNECTION_URI") session_retain_time_in_seconds = 4 * 60 * 60 max_active_handles = 150 diff --git a/lncrawl/bots/discord/discord_bot.py b/lncrawl/bots/discord/discord_bot.py index 5d2e99c05..7a74d1d03 100644 --- a/lncrawl/bots/discord/discord_bot.py +++ b/lncrawl/bots/discord/discord_bot.py @@ -1,8 +1,6 @@ import logging import discord -import redis.asyncio as redis - from . import config as C logger = logging.getLogger(__name__) @@ -12,16 +10,11 @@ class Bot(discord.Bot): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.redis = redis.from_url(url=C.redis_uri, decode_responses=True) self.load_extension("lncrawl.bots.discord.cogs.novels") async def on_ready(self): # todo: activity and stuff logger.debug(f"{self.user} is ready and online!") - logger.debug(f"Redis ping successful: {await self.redis.ping()}") - - def get_redis(self): - return self.redis client = Bot() diff --git a/lncrawl/bots/discord/novel_handlers.py b/lncrawl/bots/discord/novel_handlers.py index f1e691abf..737cae602 100644 --- a/lncrawl/bots/discord/novel_handlers.py +++ b/lncrawl/bots/discord/novel_handlers.py @@ -1,13 +1,11 @@ import asyncio import io -import math import os import re import shutil +import typing as t import logging -import redis -from typing import Callable, Tuple from .utils import to_thread from ...core.app import App from ...core.sources import crawler_list, prepare_crawler @@ -75,11 +73,11 @@ def destroy_app(app: App): app.destroy() -def archive_metadata(archive) -> Tuple[str, str]: +def archive_metadata(archive) -> t.Tuple[str, str]: return os.path.basename(os.path.dirname(archive)), os.path.basename(archive) -async def update_progress(app: App, editFollowup: Callable[[str], None]): +async def update_progress(app: App, editFollowup: t.Callable[[str], None]): chapterCount = len(app.chapters) lastProgress = 0 while app.crawler.future_progress < chapterCount: @@ -93,24 +91,6 @@ async def update_progress(app: App, editFollowup: Callable[[str], None]): await editFollowup(f"Done: {chapterCount}/{chapterCount}. Uploading your file.") -def build_hash_novel_key(app: App, start: float, end: float, format: str) -> str: - return ":".join( - [ - "files", - app.good_file_name, - f"{int(start)}_{'' if math.isinf(end) else str(int(end))}", - format, - ] - ) - - -async def get_hash_value(redis: redis.Redis, hash: str, source: str) -> str | None: - return await redis.hget( - name=hash, - key=source, - ) - - def configure_output_path(app: App): # set output path root = os.path.abspath(".discord_bot_output") diff --git a/lncrawl/bots/discord/utils.py b/lncrawl/bots/discord/utils.py index 96d3398fa..dd49a9964 100644 --- a/lncrawl/bots/discord/utils.py +++ b/lncrawl/bots/discord/utils.py @@ -1,17 +1,17 @@ import asyncio import functools -from typing import Callable, Coroutine, List +import typing as t from .config import available_formats -def validate_formats(xs: List[str]): +def validate_formats(xs: t.List[str]): for x in xs: if not x in available_formats: return False return True -def to_thread(func: Callable) -> Coroutine: +def to_thread(func: t.Callable) -> t.Coroutine: @functools.wraps(func) async def wrapper(*args, **kwargs): return await asyncio.to_thread(func, *args, **kwargs) diff --git a/requirements.txt b/requirements.txt index c92e7068a..99e2546a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,5 +39,4 @@ setuptools<=60.0.0 # bot requirements py-cord>=2.4.0 python-telegram-bot<12 -redis[hiredis] # pydrive>=1.3.1,<2.0.0