dipu-bd · kuwoyuki · Feb 16, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
diff --git a/compose.yml b/compose.yml
@@ -1,23 +1,24 @@
-version: '3'
+version: "3"
 
 services:
   chrome:
     image: selenium/standalone-chrome:latest
+    privileged: true
     shm_size: 6gb
     restart: unless-stopped
     ports:
       - "7900:7900"
       - "4444:4444"
     environment:
-      SE_VNC_VIEW_ONLY: '1'
-      SE_EVENT_BUS_PUBLISH_PORT: '4442'
-      SE_EVENT_BUS_SUBSCRIBE_PORT: '4443'
-      NODE_MAX_INSTANCE: '8'
-      NODE_MAX_SESSION: '8'
-      SE_NO_VNC_PORT: '7900'
-      SE_SCREEN_WIDTH: '1920'
-      SE_SCREEN_HEIGHT: '1080'
-      SE_NODE_GRID_URL: 'false'
+      SE_VNC_VIEW_ONLY: "1"
+      SE_EVENT_BUS_PUBLISH_PORT: "4442"
+      SE_EVENT_BUS_SUBSCRIBE_PORT: "4443"
+      NODE_MAX_INSTANCE: "8"
+      NODE_MAX_SESSION: "8"
+      SE_NO_VNC_PORT: "7900"
+      SE_SCREEN_WIDTH: "1920"
+      SE_SCREEN_HEIGHT: "1080"
+      SE_NODE_GRID_URL: "false"
 
   # telegram-bot:
   #   image: lncrawl
@@ -29,18 +30,30 @@ services:
   #   environment:
   #     CLOUD_DRIVE: "GOFILE"
   #     TELEGRAM_TOKEN: "${TELEGRAM_TOKEN}"
+  # redis:
+  #   image: redis:alpine
+  #   restart: always
+  #   ports:
+  #     - "6379:6379"
+  #   command: redis-server --save 20 1 --loglevel warning
+  #   volumes:
+  #     - redis_data:/data
 
   discord-bot:
     image: lncrawl
     build:
       context: .
       dockerfile: ./scripts/Dockerfile
     restart: unless-stopped
-    command: python -m lncrawl --suppress --bot discord --shard-id 0 --shard-count 1 --selenium-grid "http://chrome:4444"
+    command: python -m lncrawl --suppress --bot discord --selenium-grid "http://chrome:4444"
     depends_on:
       - chrome
     environment:
       CLOUD_DRIVE: "GOFILE"
       DISCORD_TOKEN: "${DISCORD_TOKEN}"
       DISCORD_SIGNAL_CHAR: "${DISCORD_SIGNAL_CHAR}"
       DISCORD_DISABLE_SEARCH: "${DISCORD_DISABLE_SEARCH}"
+
+# volumes:
+#   redis_data:
+#     driver: local
diff --git a/lncrawl/binders/calibre.py b/lncrawl/binders/calibre.py
@@ -7,13 +7,31 @@
 EBOOK_CONVERT = "ebook-convert"
 CALIBRE_LINK = "https://calibre-ebook.com/download"
 
+# ebook-convert + [
+#     '/home/mira/Projects/misc/lightnovel-crawler/.discord_bot_output/novelfull-com/Birth Of The Demonic Sword/epub/Birth Of The Demonic Sword c1-5.epub',
+#     '/home/mira/Projects/misc/lightnovel-crawler/.discord_bot_output/novelfull-com/Birth Of The Demonic Sword/mobi/Birth Of The Demonic Sword c1-5.mobi',
+#     '--unsmarten-punctuation',
+#     '--no-chapters-in-toc',
+#     '--title', 'Birth Of The Demonic Sword c1-5',
+#     '--authors', 'Eveofchaos',
+#     '--comments', '',
+#     '--language', 'en',
+#     '--tags', [],
+#     '--series', 'Birth Of The Demonic Sword',
+#     '--publisher', 'https://novelfull.com/',
+#     '--book-producer', 'Lightnovel Crawler',
+#     '--enable-heuristics',
+#     '--disable-renumber-headings',
+#     '--cover', '/home/mira/Projects/misc/lightnovel-crawler/.discord_bot_output/novelfull-com/Birth Of The Demonic Sword/cover.jpg']
+
 
 def run_ebook_convert(*args):
     """
     Calls `ebook-convert` with given args
     Visit https://manual.calibre-ebook.com/generated/en/ebook-convert.html for argument list.
     """
     try:
+        # print(f"{EBOOK_CONVERT} {' '.join(list(args))}")
         isdebug = os.getenv("debug_mode")
         with open(os.devnull, "w", encoding="utf8") as dumper:
             subprocess.call(

diff --git a/lncrawl/binders/epub.py b/lncrawl/binders/epub.py
@@ -197,7 +197,7 @@ def bind_epub_book(
     os.makedirs(epub_path, exist_ok=True)
     epub.write_epub(file_path, book, {})
 
-    print("Created: %s.epub" % file_name)
+    logger.info("Created: %s.epub" % file_name)
     return file_path
 
 

diff --git a/lncrawl/binders/text.py b/lncrawl/binders/text.py
@@ -25,5 +25,5 @@ def make_texts(app, data):
                 file.write(text)
                 text_files.append(file_name)
 
-    print("Created: %d text files" % len(text_files))
+    logger.info("Created: %d text files" % len(text_files))
     return text_files
diff --git a/lncrawl/bots/discord/cogs/__init__.py b/lncrawl/bots/discord/cogs/__init__.py
diff --git a/lncrawl/bots/discord/cogs/novels.py b/lncrawl/bots/discord/cogs/novels.py
@@ -0,0 +1,133 @@
+import asyncio
+import io
+import math
+import discord
+import logging
+from discord.ext import commands
+
+from lncrawl.core.app import App
+
+from ..components import NovelMenu
+from ..utils import validate_formats
+from ..config import available_formats
+from ..novel_handlers import (
+    archive_metadata,
+    configure_output_path,
+    destroy_app,
+    download_novel,
+    novel_by_title,
+    novel_by_url,
+    upload_file,
+    update_progress,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class Novels(commands.Cog):
+    def __init__(self, bot):
+        self.bot: discord.Bot = bot
+
+    @discord.slash_command(name="download", description="Download a novel by URL")
+    @discord.option("url", description="Novel URL")
+    @discord.option("start", description="Start chapter", default=0)
+    @discord.option("end", description="End chapter", default=math.inf)
+    @discord.option(
+        "formats", description="Comma separated target formats", default="epub"
+    )
+    async def download(
+        self,
+        ctx: discord.ApplicationContext,
+        url: str,
+        start: float,
+        end: float,
+        formats: str,
+    ):
+        if not url.startswith("http"):
+            await ctx.respond("You specified an invalid URL")
+            return
+        formats_list = list(map(str.strip, formats.split(",")))
+        if not validate_formats(formats_list):
+            fs = ", ".join(available_formats)
+            await ctx.respond(
+                f"The format you specified is invalid, the available formats are: {fs}"
+            )
+        # start thinking
+        await ctx.defer()
+
+        app: App = await novel_by_url(url)
+        embed = discord.Embed(
+            title=app.crawler.novel_title,
+            url=app.crawler.novel_url,
+            description=app.crawler.novel_synopsis,
+        )
+        embed.set_thumbnail(url=app.crawler.novel_cover)
+        embed.add_field(name="Author", value=app.crawler.novel_author, inline=False)
+        embed.add_field(name="Volumes", value=len(app.crawler.volumes))
+        embed.add_field(name="Chapters", value=len(app.crawler.chapters))
+        await ctx.respond(embed=embed)
+
+        # set chapters
+        if math.isinf(end):
+            app.chapters = app.crawler.chapters[int(start) :]
+        else:
+            app.chapters = app.crawler.chapters[int(start) : int(end)]
+
+        followUp = await ctx.respond(
+            f"I don't have this file, downloading {len(app.chapters)} chapters, this will take a while."
+        )
+
+        # set formats
+        app.output_formats = {x: (x in formats_list) for x in available_formats}
+        # set up directories
+        app.output_path = configure_output_path(app)
+        # update the user with dl progress
+        progress_report = update_progress(app, followUp.edit)
+        asyncio.create_task(progress_report)
+
+        # start the download
+        archive_list = await download_novel(app)
+
+        try:
+            for archive in archive_list:
+                archive_format, archive_name = archive_metadata(archive)
+                result = await upload_file(archive)
+                if isinstance(result, str):
+                    await ctx.respond(f"Download URL: {result}")
+                elif isinstance(result, io.BufferedReader):
+                    await ctx.respond(
+                        file=discord.File(filename=archive_name, fp=result)
+                    )
+                    # cache if needed
+                    # attachment, *_ = fileResponse.attachments
+                else:
+                    await ctx.respond(f"Failed to upload {archive_name}")
+        finally:
+            await destroy_app(app)
+
+    @discord.slash_command(name="search", description="Search a novel by name")
+    @discord.option("name", description="Lightnovel name")
+    @discord.option("pattern", description="Regex pattern", default="")
+    async def search(
+        self,
+        ctx: discord.ApplicationContext,
+        name: str,
+        pattern: str,
+    ):
+        if len(name) < 4:
+            await ctx.respond("Query string is too short")
+            return
+        # start thinking
+        await ctx.defer()
+        app: App = await novel_by_title(name, pattern)
+        # app.search_results
+        selectNovelView = NovelMenu()
+        selectNovelView.add_items(novelList=app.search_results[:24])
+        await ctx.respond(
+            "Select a novel, use the returned link in the `/download` command",
+            view=selectNovelView,
+        )
+
+
+def setup(bot):  # this is called by Pycord to setup the cog
+    bot.add_cog(Novels(bot))  # add the cog to the bot
diff --git a/lncrawl/bots/discord/components.py b/lncrawl/bots/discord/components.py
@@ -0,0 +1,61 @@
+import discord
+import typing as t
+
+from lncrawl.models.search_result import CombinedSearchResult
+
+
+class NovelSelectMenu(discord.ui.Select):
+    def __init__(self):
+        super().__init__(
+            placeholder="Select a novel...",
+            min_values=1,
+            max_values=1,
+            row=0,
+        )
+        self.novelList = []
+
+    def fill_options(self, novelList: t.List[CombinedSearchResult]) -> None:
+        self.novelList = novelList
+        for i, item in enumerate(novelList):
+            nc = len(item.novels)
+            self.add_option(
+                label=item.title,
+                value=str(i),
+                description=f"{nc} source{'s'[:nc^1]}",
+            )
+
+    async def callback(self, interaction: discord.Interaction):
+        assert self.view is not None
+        value = self.values[0]
+        novel_list = [
+            f"{i+1}. <{item.url}> {item.info or ''}".strip()
+            for i, item in enumerate(self.novelList[int(value)].novels)
+        ]
+
+        message = ""
+        novel_count = len(novel_list)
+        responded = False
+        reply = (
+            lambda msg: interaction.response.send_message(msg.strip())
+            if not responded
+            else interaction.followup.send(msg.strip())
+        )
+        # split into separate messages w/ length up to 2000 chars
+        for i, line in enumerate(novel_list):
+            message_len = len(message)
+            if (message_len + len(line) + 1) >= 2000:
+                await reply(message)
+                responded = True
+                message = ""
+            message += line + "\n"
+            if i == novel_count - 1:
+                await reply(message)
+
+        return
+
+
+class NovelMenu(discord.ui.View):
+    def add_items(self, novelList: t.List[CombinedSearchResult]) -> None:
+        selectMenu = NovelSelectMenu()
+        selectMenu.fill_options(novelList)
+        self.add_item(selectMenu)
diff --git a/lncrawl/bots/discord/config.py b/lncrawl/bots/discord/config.py
@@ -25,8 +25,8 @@
     "text",
     "web",
     "mobi",
-    #'pdf',
-    #'fb2',
+    "pdf",
+    "fb2",
 ]
 
 os.makedirs("logs", exist_ok=True)