Skip to content

Commit

Permalink
Format Python code with psf/black push
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions authored and github-actions committed Apr 18, 2023
1 parent bf14580 commit 4d6471f
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions cogs/search_service_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,23 @@
import discord
from bs4 import BeautifulSoup
from discord.ext import pages
from langchain import GoogleSearchAPIWrapper, WolframAlphaAPIWrapper, FAISS, InMemoryDocstore
from langchain import (
GoogleSearchAPIWrapper,
WolframAlphaAPIWrapper,
FAISS,
InMemoryDocstore,
)
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory, CombinedMemory
from langchain.requests import TextRequestsWrapper, Requests
from llama_index import GPTSimpleVectorIndex, Document, SimpleDirectoryReader, ServiceContext, OpenAIEmbedding
from llama_index import (
GPTSimpleVectorIndex,
Document,
SimpleDirectoryReader,
ServiceContext,
OpenAIEmbedding,
)
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT
from pydantic import Extra, BaseModel
from transformers import GPT2TokenizerFast
Expand All @@ -39,6 +50,7 @@

vector_stores = {}


class RedoSearchUser:
def __init__(self, ctx, query, search_scope, nodes, response_mode):
self.ctx = ctx
Expand Down Expand Up @@ -67,20 +79,17 @@ class Config:
def __init__(self, **data: Any):
super().__init__(**data)


@property
def requests(self) -> Requests:
return Requests(headers=self.headers, aiosession=self.aiosession)

def get(self, url: str, **kwargs: Any) -> str:

# the "url" field is actuall some input from the LLM, it is a comma separated string of the url and a boolean value and the original query
url, use_gpt4, original_query = url.split(",")
use_gpt4 = use_gpt4 == "True"
"""GET the URL and return the text."""
text = self.requests.get(url, **kwargs).text


# Load this text into BeautifulSoup, clean it up and only retain text content within <p> and <title> and <h1> type tags, get rid of all javascript and css too.
soup = BeautifulSoup(text, "html.parser")

Expand All @@ -94,7 +103,6 @@ def get(self, url: str, **kwargs: Any) -> str:
# Clean up white spaces
text = re.sub(r"\s+", " ", text)


# If not using GPT-4 and the text token amount is over 3500, truncate it to 3500 tokens
tokens = len(self.tokenizer(text)["input_ids"])
print("The scraped text content is: " + text)
Expand All @@ -110,11 +118,17 @@ def get(self, url: str, **kwargs: Any) -> str:
index = GPTSimpleVectorIndex.from_documents(
document, service_context=service_context, use_async=True
)
response_text = index.query(original_query, refine_template=CHAT_REFINE_PROMPT, similarity_top_k=4, response_mode="compact")
response_text = index.query(
original_query,
refine_template=CHAT_REFINE_PROMPT,
similarity_top_k=4,
response_mode="compact",
)
return response_text

return text


class SearchService(discord.Cog, name="SearchService"):
"""Cog containing translation commands and retrieval of translation services"""

Expand Down

0 comments on commit 4d6471f

Please sign in to comment.