Skip to content

Commit

Permalink
fix: Bugs fixed for managing embeddings
Browse files Browse the repository at this point in the history
Signed-off-by: ishaansehgal99 <[email protected]>
  • Loading branch information
ishaansehgal99 committed Sep 28, 2024
1 parent 7165ccf commit 7f39939
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 5 deletions.
1 change: 0 additions & 1 deletion presets/rag_service/embedding/huggingface_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,4 @@ def get_embedding_dimension(self) -> int:
dummy_input = "This is a dummy sentence."
embedding = self.get_text_embedding(dummy_input)

# TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size)
return len(embedding)
1 change: 0 additions & 1 deletion presets/rag_service/embedding/huggingface_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ def get_embedding_dimension(self) -> int:
dummy_input = "This is a dummy sentence."
embedding = self.get_text_embedding(dummy_input)

# TODO Assume embedding is a 1D array (needs to be tested); return its length (the dimension size)
return len(embedding)
Empty file.
5 changes: 5 additions & 0 deletions presets/rag_service/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Force CPU-only execution for testing
os.environ["OMP_NUM_THREADS"] = "1" # Force single-threaded for testing to prevent segfault while loading embedding model
64 changes: 64 additions & 0 deletions presets/rag_service/tests/test_faiss_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
from tempfile import TemporaryDirectory
from unittest.mock import MagicMock

import pytest
from vector_store.faiss_store import FaissVectorStoreManager
from models import Document
from embedding.huggingface_local import LocalHuggingFaceEmbedding
from config import MODEL_ID

@pytest.fixture(scope='session')
def init_embed_manager():
return LocalHuggingFaceEmbedding(MODEL_ID)

@pytest.fixture
def vector_store_manager(init_embed_manager):
with TemporaryDirectory() as temp_dir:
# Mock the persistence directory
os.environ['PERSIST_DIR'] = temp_dir
yield FaissVectorStoreManager(init_embed_manager)


def test_index_documents(vector_store_manager):
documents = [
Document(doc_id="1", text="First document", metadata={"type": "text"}),
Document(doc_id="2", text="Second document", metadata={"type": "text"})
]

doc_ids = vector_store_manager.index_documents(documents, index_name="test_index")

assert len(doc_ids) == 2
assert doc_ids == ["1", "2"]


def test_query_documents(vector_store_manager):
# Add documents to index
documents = [
Document(doc_id="1", text="First document", metadata={"type": "text"}),
Document(doc_id="2", text="Second document", metadata={"type": "text"})
]
vector_store_manager.index_documents(documents, index_name="test_index")

# Mock query and results
query_result = vector_store_manager.query("First", top_k=1, index_name="test_index")

assert query_result is not None


def test_add_and_delete_document(vector_store_manager):
document = Document(doc_id="3", text="Third document", metadata={"type": "text"})
vector_store_manager.index_documents([document], index_name="test_index")

# Add a document to the existing index
new_document = Document(doc_id="4", text="Fourth document", metadata={"type": "text"})
vector_store_manager.add_document(new_document, index_name="test_index")

# Assert that the document exists
assert vector_store_manager.document_exists("4", "test_index")

# Delete the document
vector_store_manager.delete_document("4", "test_index")

# Assert that the document no longer exists
assert not vector_store_manager.document_exists("4", "test_index")
7 changes: 4 additions & 3 deletions presets/rag_service/vector_store/faiss_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@


class FaissVectorStoreManager(BaseVectorStore):
def __init__(self, embed_model):
self.embed_model = embed_model
self.dimension = self.embed_model.get_embedding_dimension()
def __init__(self, embedding_manager):
self.embedding_manager = embedding_manager
self.embed_model = self.embedding_manager.model
self.dimension = self.embedding_manager.get_embedding_dimension()
# TODO: Consider allowing user custom indexing method e.g.
"""
# Choose the FAISS index type based on the provided index_method
Expand Down

0 comments on commit 7f39939

Please sign in to comment.