Merge pull request #376 from Kav-K/better-indexing

Better indexing
Kav-K · Nov 4, 2023 · 6155574 · 6155574
2 parents c1908c1 + 17a8b76
commit 6155574
Show file tree

Hide file tree

Showing 8 changed files with 521 additions and 211 deletions.
diff --git a/README.md b/README.md
@@ -49,14 +49,19 @@ Internet-connected chat (Google + Wolfram + Link Crawling)<br>
 Code Interpreter / Advanced Data Analysis <br>
 <img src="https://i.imgur.com/Y2VvwHd.png"/><br>
 Custom indexing and Document Q&A<br>
-<img src="https://i.imgur.com/iPGuUt1.png"/><br>
+<img src="https://i.imgur.com/1uKF1ye.png"/><br>
 </p>  
 
-# Recent Notable Updates  
+# Recent Notable Updates
 - **Code Interpreter / Advanced Data Analysis** - Just like ChatGPT, GPTDiscord now has a fully-fledged code execution environment. You can work with GPT to execute your code in an isolated environment, with the ability to even install Python and system packages, and access the internet from the execution environment.
 
+
 - **Multi-modality** - GPTDiscord now supports images sent to the bot during a conversation made with `/gpt converse`!
 
+
+- **Drag And Drop Document Chat** - Chat with your documents by simply dragging and dropping files, or even links into discord chat! `/index chat`
+
+
 - **Internet-connected Chat!** - Chat with an instance of GPT3.5 or GPT-4 that's connected to Google and Wolfram Alpha and can browse and access links that you send it!
 
 # Features

diff --git a/cogs/code_interpreter_service_cog.py b/cogs/code_interpreter_service_cog.py
@@ -94,31 +94,6 @@ def __init__(
         self.sessions = {}
         # Make a mapping of all the country codes and their full country names:
 
-    async def paginate_chat_embed(self, response_text):
-        """Given a response text make embed pages and return a list of the pages."""
-
-        response_text = [
-            response_text[i : i + 3500] for i in range(0, len(response_text), 7000)
-        ]
-        pages = []
-        first = False
-        # Send each chunk as a message
-        for count, chunk in enumerate(response_text, start=1):
-            if not first:
-                page = discord.Embed(
-                    title=f"{count}",
-                    description=chunk,
-                )
-                first = True
-            else:
-                page = discord.Embed(
-                    title=f"{count}",
-                    description=chunk,
-                )
-            pages.append(page)
-
-        return pages
-
     @discord.Cog.listener()
     async def on_message(self, message):
         # Check if the message is from a bot.
@@ -277,7 +252,7 @@ async def on_message(self, message):
             artifacts_available = len(artifact_names) > 0
 
             if len(response) > 2000:
-                embed_pages = await self.paginate_chat_embed(response)
+                embed_pages = await EmbedStatics.paginate_chat_embed(response)
                 paginator = pages.Paginator(
                     pages=embed_pages,
                     timeout=None,

diff --git a/cogs/commands.py b/cogs/commands.py
@@ -739,18 +739,6 @@ async def load_index(
         guild_ids=ALLOWED_GUILDS,
     )
     @discord.guild_only()
-    @discord.option(
-        name="user_index",
-        description="Which user file to load the index from",
-        required=False,
-        autocomplete=File_autocompleter.get_user_indexes,
-    )
-    @discord.option(
-        name="search_index",
-        description="Which search index file to load the index from",
-        required=False,
-        autocomplete=File_autocompleter.get_user_search_indexes,
-    )
     @discord.option(
         name="model",
         description="The model to use for the conversation",
@@ -761,12 +749,10 @@ async def load_index(
     async def talk(
         self,
         ctx: discord.ApplicationContext,
-        user_index: str,
-        search_index: str,
         model: str,
     ):
         await ctx.defer()
-        await self.index_cog.index_chat_command(ctx, user_index, search_index, model)
+        await self.index_cog.index_chat_command(ctx, model)
 
     @add_to_group("index")
     @discord.slash_command(

diff --git a/cogs/index_service_cog.py b/cogs/index_service_cog.py
@@ -1,9 +1,12 @@
 import datetime
 import traceback
 
+import aiofiles
 import discord
 import os
 
+from discord.ext import pages
+
 from models.embed_statics_model import EmbedStatics
 from services.deletion_service import Deletion
 from services.environment_service import EnvService
@@ -34,6 +37,59 @@ def __init__(
         self.thread_awaiting_responses = []
         self.deletion_queue = deletion_queue
 
+    async def process_indexing(self, message, index_type, content=None, link=None):
+        """
+        Helper method to process indexing for both files and links.
+        - index_type: 'file' or 'link'
+        - content: The file content if index_type is 'file'
+        - link: The link if index_type is 'link'
+        """
+        thinking_embed = discord.Embed(
+            title=f"🤖💬 Indexing {index_type} and saving to agent knowledge",
+            color=0x808080,
+        )
+        thinking_embed.set_footer(text="This may take a few seconds.")
+
+        try:
+            thinking_message = await message.reply(embed=thinking_embed)
+        except:
+            traceback.print_exc()
+
+        if index_type == "file":
+            indexing_result, summary = await self.index_handler.index_chat_file(
+                message, content
+            )
+        else:
+            indexing_result, summary = await self.index_handler.index_link(
+                link, summarize=True, index_chat_ctx=message
+            )
+            print("The summary is " + str(summary))
+
+        try:
+            await thinking_message.delete()
+        except:
+            pass
+
+        if not indexing_result:
+            failure_embed = discord.Embed(
+                title="Indexing Error",
+                description=f"Your {index_type} could not be indexed",
+                color=discord.Color.red(),
+            )
+            failure_embed.set_thumbnail(url="https://i.imgur.com/hbdBZfG.png")
+            await message.reply(embed=failure_embed)
+            self.thread_awaiting_responses.remove(message.channel.id)
+            return False
+
+        success_embed = discord.Embed(
+            title=f"{index_type.capitalize()} Interpreted",
+            description=f"The {index_type} you've uploaded has successfully been interpreted. The summary is below:\n`{summary}`",
+            color=discord.Color.green(),
+        )
+        success_embed.set_thumbnail(url="https://i.imgur.com/I5dIdg6.png")
+        await message.reply(embed=success_embed)
+        return True
+
     @discord.Cog.listener()
     async def on_message(self, message):
         # Check for self
@@ -79,19 +135,81 @@ async def on_message(self, message):
             except:
                 pass
 
+            # Handle file uploads
+            file = message.attachments[0] if len(message.attachments) > 0 else None
+
+            # File operations, allow for user file upload
+            if file:
+                indexing_result = await self.process_indexing(
+                    message, "file", content=file
+                )
+
+                if not indexing_result:
+                    self.thread_awaiting_responses.remove(message.channel.id)
+                    return
+
+                prompt += (
+                    "\n{System Message: the user has just uploaded the file "
+                    + str(file.filename)
+                    + "Unless the user asked a specific question, do not use your tools and instead just acknowledge the upload}"
+                )
+
+            # Link operations, allow for user link upload, we connect and download the content at the link.
+            if "http" in prompt:
+                # Extract the entire link
+                link = prompt[prompt.find("http") :]
+
+                indexing_result = await self.process_indexing(
+                    message, "link", link=link
+                )
+
+                if not indexing_result:
+                    self.thread_awaiting_responses.remove(message.channel.id)
+                    return
+
+                prompt += (
+                    "\n{System Message: you have just indexed the link "
+                    + str(link)
+                    + "}"
+                )
+
             chat_result = await self.index_handler.execute_index_chat_message(
                 message, prompt
             )
+
             if chat_result:
-                await message.channel.send(chat_result)
+                if len(chat_result) > 2000:
+                    embed_pages = await EmbedStatics.paginate_chat_embed(chat_result)
+                    paginator = pages.Paginator(
+                        pages=embed_pages,
+                        timeout=None,
+                        author_check=False,
+                    )
+                    try:
+                        await paginator.respond(message)
+                    except:
+                        chat_result = [
+                            chat_result[i : i + 1900]
+                            for i in range(0, len(chat_result), 1900)
+                        ]
+                        for count, chunk in enumerate(chat_result, start=1):
+                            await message.channel.send(chunk)
+
+                else:
+                    chat_result = chat_result.replace("\\n", "\n")
+                    # Build a response embed
+                    response_embed = discord.Embed(
+                        title="",
+                        description=chat_result,
+                        color=0x808080,
+                    )
+                    await message.reply(
+                        embed=response_embed,
+                    )
                 self.thread_awaiting_responses.remove(message.channel.id)
 
-    async def index_chat_command(self, ctx, user_index, search_index, model):
-        if not user_index and not search_index:
-            await ctx.respond("Please provide a valid user index or search index")
-            return
-
-        await self.index_handler.start_index_chat(ctx, search_index, user_index, model)
+    async def index_chat_command(self, ctx, model):
+        await self.index_handler.start_index_chat(ctx, model)
 
         pass
 

diff --git a/cogs/search_service_cog.py b/cogs/search_service_cog.py
@@ -311,31 +311,6 @@ async def paginate_embed(
 
         return pages
 
-    async def paginate_chat_embed(self, response_text):
-        """Given a response text make embed pages and return a list of the pages."""
-
-        response_text = [
-            response_text[i : i + 3500] for i in range(0, len(response_text), 7000)
-        ]
-        pages = []
-        first = False
-        # Send each chunk as a message
-        for count, chunk in enumerate(response_text, start=1):
-            if not first:
-                page = discord.Embed(
-                    title=f"{count}",
-                    description=chunk,
-                )
-                first = True
-            else:
-                page = discord.Embed(
-                    title=f"{count}",
-                    description=chunk,
-                )
-            pages.append(page)
-
-        return pages
-
     @discord.Cog.listener()
     async def on_message(self, message):
         # Check if the message is from a bot.
@@ -426,7 +401,7 @@ async def on_message(self, message):
                 return
 
             if len(response) > 2000:
-                embed_pages = await self.paginate_chat_embed(response)
+                embed_pages = await EmbedStatics.paginate_chat_embed(response)
                 paginator = pages.Paginator(
                     pages=embed_pages,
                     timeout=None,

diff --git a/gpt3discord.py b/gpt3discord.py
@@ -34,7 +34,7 @@
 from models.openai_model import Model
 
 
-__version__ = "11.9.9"
+__version__ = "12.0.0"
 
 
 PID_FILE = Path("bot.pid")
@@ -94,7 +94,7 @@
 # Settings for the bot
 #
 activity = discord.Activity(
-    type=discord.ActivityType.watching, name="for /help /gpt, and more!"
+    type=discord.ActivityType.watching, name="for /help, /gpt, and more!"
 )
 bot = discord.Bot(intents=discord.Intents.all(), command_prefix="!", activity=activity)
 usage_service = UsageService(Path(os.environ.get("DATA_DIR", os.getcwd())))

diff --git a/models/embed_statics_model.py b/models/embed_statics_model.py
@@ -9,6 +9,32 @@ class EmbedStatics:
     def __init__(self):
         pass
 
+    @staticmethod
+    def paginate_chat_embed(response_text):
+        """Given a response text make embed pages and return a list of the pages."""
+
+        response_text = [
+            response_text[i : i + 3500] for i in range(0, len(response_text), 7000)
+        ]
+        pages = []
+        first = False
+        # Send each chunk as a message
+        for count, chunk in enumerate(response_text, start=1):
+            if not first:
+                page = discord.Embed(
+                    title=f"{count}",
+                    description=chunk,
+                )
+                first = True
+            else:
+                page = discord.Embed(
+                    title=f"{count}",
+                    description=chunk,
+                )
+            pages.append(page)
+
+        return pages
+
     @staticmethod
     def get_api_timeout_embed():
         embed = discord.Embed(