diff --git a/requirements.txt b/requirements.txt index 5130c8d..7b27d8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,8 +22,7 @@ langchain-community==0.2.1 langtrace_python_sdk==2.1.26 qdrant-client==1.9.2 -supabase==1.0.2 -pinecone-client==2.2.2 + sentence_transformers==2.2.2 openai==1.30.5 tavily-python==0.3.3 diff --git a/src/initialize.py b/src/initialize.py index 5f1f865..e066998 100644 --- a/src/initialize.py +++ b/src/initialize.py @@ -2,7 +2,6 @@ import logging as lg import os -import pinecone import yaml from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI @@ -12,16 +11,12 @@ HumanMessagePromptTemplate, SystemMessagePromptTemplate, ) -from langchain.vectorstores.pinecone import Pinecone from langchain.vectorstores.qdrant import Qdrant from openai import AsyncOpenAI from qdrant_client import QdrantClient from qdrant_client.models import VectorParams -from supabase.client import Client, create_client from tavily import TavilyClient -from src.utils import StandardSupabaseVectorStore - def initialize_logging(): logger = lg.getLogger() @@ -62,61 +57,13 @@ def _init_config(): def _init_vector_store(config_loader): logger = lg.getLogger(_init_vector_store.__name__) logger.info("Initializing vector store") - if config_loader["vector_store"] == "pinecone": - vector_store = _init_vector_store_pinecone(config_loader) - elif config_loader["vector_store"] == "supabase": - vector_store = _init_vector_store_supabase(config_loader) - elif config_loader["vector_store"] == "qdrant": + if config_loader["vector_store"] == "qdrant": vector_store = _init_vector_stores_qdrant(config_loader) else: raise ValueError("Vector Database not configured") return vector_store -def _init_vector_store_pinecone(config_loader): - logger = lg.getLogger(_init_vector_store_pinecone.__name__) - logger.info("Initializing vector store") - pinecone.init( - api_key=os.environ["PINECONE_API_KEY"], - environment=os.environ["PINECONE_ENV"], - ) - index_name = config_loader["vector_store_index_name"] - index = pinecone.Index(index_name) - embeddings = HuggingFaceEmbeddings( - model_name=config_loader["embeddings_model_name"], - model_kwargs={"device": "cpu"}, - ) - vector_store = Pinecone(index, embeddings.embed_query, "text") - logger.info(pinecone.describe_index(index_name)) - logger.info(index.describe_index_stats()) - logger.info("Initialized vector store") - return vector_store - - -def _init_vector_store_supabase(config_loader): - from supabase.lib.client_options import ClientOptions - - logger = lg.getLogger(_init_vector_store_supabase.__name__) - logger.info("Initializing vector store") - supabase_client: Client = create_client( - supabase_url=os.environ.get("SUPABASE_API_URL"), - supabase_key=os.environ.get("SUPABASE_API_KEY"), - options=ClientOptions(postgrest_client_timeout=60), - ) - embeddings = HuggingFaceEmbeddings( - model_name=config_loader["embeddings_model_name"], - model_kwargs={"device": "cpu"}, - ) - vector_store = StandardSupabaseVectorStore( - client=supabase_client, - embedding=embeddings, - table_name=config_loader["table_name"], - query_name=config_loader["query_name"], - ) - logger.info("Initialized vector store") - return vector_store - - def _init_vector_stores_qdrant(config_loader): logger = lg.getLogger(_init_vector_stores_qdrant.__name__) logger.info("Initializing vector stores") diff --git a/src/utils.py b/src/utils.py index d4bb878..8f04dea 100644 --- a/src/utils.py +++ b/src/utils.py @@ -13,13 +13,6 @@ ) -class StandardSupabaseVectorStore(SupabaseVectorStore): - def similarity_search_with_score( - self, query: str, k: int = 4, **kwargs: tp.Any - ) -> tp.List[tp.Tuple[Document, float]]: - return self.similarity_search_with_relevance_scores(query, k, **kwargs) - - class QAResponsePayloadModel(BaseModel): scoring_id: str context: tp.List[tp.Tuple[Document, float]]