Fix conversation summarization

Kav-K · Oct 30, 2023 · 4ad4527 · 4ad4527
1 parent b23d76e
commit 4ad4527
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 22 deletions.
diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py
@@ -608,7 +608,7 @@ async def check_conversation_limit(self, message):
     async def summarize_conversation(self, message, prompt):
         """Takes a conversation history filled prompt and summarizes it to then start a new history with it as the base"""
         response = await self.model.send_summary_request(prompt)
-        summarized_text = response["choices"][0]["text"]
+        summarized_text = response["choices"][0]["message"]["content"]
 
         new_conversation_history = []
         new_conversation_history.append(

diff --git a/conversation_starter_pretext.txt b/conversation_starter_pretext.txt
@@ -33,11 +33,11 @@ Human: I'm making a discord bot <|endofstatement|>
 
 There can be an arbitrary amount of newlines between chat entries. <username> can be any name, pay attention to who's talking. The text "<|endofstatement|>" is used to separate chat entries and make it easier for you to understand the context.
 
-Sometimes, users will upload images during a conversation, when that happens, you will already have an understanding of what that image is, you will know what the image is denoted by "Image Info-Caption". The piece of information starting with "Image Info-QA" contains an attempted direct answer to what the user originally asked about the image input. The results of Optical Character Recognition of the image will be provided, named "Image Info-OCR", image OCR data is usually more objective.
+Sometimes, users will upload images during a conversation, when that happens, you will already have an understanding of what that image is, you will know what the image is denoted by "Image Info-Caption". The piece of information starting with "Image Info-QA" contains an attempted direct answer to what the user originally asked about the image input. There is another versin of Info-QA called "Revised Image Info-QA" which is a more important and accurate answer to the question based on multimodal understanding. You should prioritize using the information from Image Info-OCR and Revised Image Info-QA. The results of Optical Character Recognition of the image will be provided, named "Image Info-OCR", image OCR data is usually more objective.
 For example:
-Human: Image Info-Caption: a sign that says rya, ohio\nInfo-QA: rya, ohio\nImage Info-OCR: AYR,\nLONTARIO \nWhere is this? <|endofstatement|>
+Human: Image Info-Caption: a sign that says ayr, ohio\nInfo-QA: ayr, ohio\nRevised Image Info-QA: This is a town in Ayr, Ontario\nImage Info-OCR: AYR,\nLONTARIO \nWhere is this? <|endofstatement|>
 <yourname>: This is an image of the town Ayr, Ontario <|endofstatement|>
-Human: Image Info-Caption: a landscape with a river and trees\nImage Info-QA: yes\nImage Info-OCR: \nWhat is this image? Is it cartoony? <|endofstatement|>
+Human: Image Info-Caption: a landscape with a river and trees\nImage Info-QA: yes\nRevised Image Info-QA: This is a beautiful river and tree landscape, it is in a cartoony art style\nImage Info-OCR: \nWhat is this image? Is it cartoony? <|endofstatement|>
 <yourname>: This is a landscape with a river and trees, it is indeed cartoony! <|endofstatement|>
 ...
 

diff --git a/gpt3discord.py b/gpt3discord.py
@@ -34,7 +34,7 @@
 from models.openai_model import Model
 
 
-__version__ = "11.9.7"
+__version__ = "11.9.8"
 
 
 PID_FILE = Path("bot.pid")

diff --git a/models/openai_model.py b/models/openai_model.py
@@ -134,7 +134,7 @@ class ModelLimits:
     MIN_CONVERSATION_LENGTH = 1
     MAX_CONVERSATION_LENGTH = 100000
 
-    MIN_SUMMARIZE_THRESHOLD = 800
+    MIN_SUMMARIZE_THRESHOLD = 1500
     MAX_SUMMARIZE_THRESHOLD = 30000
 
     MIN_NUM_IMAGES = 1
@@ -798,35 +798,45 @@ async def send_summary_request(self, prompt, custom_api_key=None):
 
         summary_request_text = "".join(summary_request_text)
 
-        tokens = self.usage_service.count_tokens(summary_request_text)
+        messages = []
+        messages.append(
+            {
+                "role": "system",
+                "content": summary_request_text,
+            }
+        )
 
-        async with aiohttp.ClientSession(raise_for_status=False) as session:
+        async with aiohttp.ClientSession(
+            raise_for_status=False, timeout=aiohttp.ClientTimeout(total=300)
+        ) as session:
             payload = {
-                "model": Models.DAVINCI,
-                "prompt": summary_request_text,
-                "temperature": 0.5,
-                "top_p": 1,
-                "max_tokens": self.max_tokens - tokens,
+                "model": self.model if self.model is not None else Models.GPT4_32,
+                "messages": messages,
+                "temperature": self.temp,
+                "top_p": self.top_p,
                 "presence_penalty": self.presence_penalty,
                 "frequency_penalty": self.frequency_penalty,
-                "best_of": self.best_of,
             }
             headers = {
-                "Content-Type": "application/json",
-                "Authorization": f"Bearer {self.openai_key if not custom_api_key else custom_api_key}",
+                "Authorization": f"Bearer {self.openai_key if not custom_api_key else custom_api_key}"
             }
             self.use_org = True if "true" in str(self.use_org).lower() else False
             if self.use_org:
                 if self.openai_organization:
                     headers["OpenAI-Organization"] = self.openai_organization
+
             async with session.post(
-                "https://api.openai.com/v1/completions", json=payload, headers=headers
+                "https://api.openai.com/v1/chat/completions",
+                json=payload,
+                headers=headers,
             ) as resp:
                 response = await resp.json()
-
-                await self.valid_text_request(response)
-
-                # print(response["choices"][0]["text"])
+                # print(f"Payload -> {payload}")
+                # Parse the total tokens used for this request and response pair from the response
+                await self.valid_text_request(
+                    response, model=self.model if self.model is not None else Models.GPT4_32
+                )
+                print(f"Summary response -> {response}")
 
                 return response
 

diff --git a/services/text_service.py b/services/text_service.py
@@ -239,13 +239,18 @@ async def encapsulated_send(
                 # We don't need to worry about the differences between interactions and messages in this block,
                 # because if we are in this block, we can only be using a message object for ctx
                 if converser_cog.model.summarize_conversations:
-                    await ctx.reply(
+                    summarizing_message = await ctx.reply(
                         "I'm currently summarizing our current conversation so we can keep chatting, "
                         "give me one moment!"
                     )
 
                     await converser_cog.summarize_conversation(ctx, new_prompt)
 
+                    try:
+                        await summarizing_message.delete()
+                    except:
+                        pass
+
                     # Check again if the prompt is about to go past the token limit
                     new_prompt = (
                         "".join(