-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Introduce testing framework and chroma
- Loading branch information
1 parent
ad0dde9
commit 3d09952
Showing
5 changed files
with
166 additions
and
118 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
pkg/ragengine/services/tests/vector_store/test_base_store.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
import os | ||
from tempfile import TemporaryDirectory | ||
from unittest.mock import patch | ||
import pytest | ||
from abc import ABC, abstractmethod | ||
|
||
from services.vector_store.base import BaseVectorStore | ||
from services.models import Document | ||
from services.embedding.huggingface_local import LocalHuggingFaceEmbedding | ||
from services.config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET | ||
from services.config import PERSIST_DIR | ||
|
||
class BaseVectorStoreTest(ABC): | ||
"""Base class for vector store tests that defines the test structure.""" | ||
|
||
@pytest.fixture(scope='session') | ||
def init_embed_manager(self): | ||
return LocalHuggingFaceEmbedding(MODEL_ID) | ||
|
||
@pytest.fixture | ||
@abstractmethod | ||
def vector_store_manager(self, init_embed_manager): | ||
"""Each implementation must provide its own vector store manager.""" | ||
pass | ||
|
||
def test_index_documents(self, vector_store_manager): | ||
first_doc_text, second_doc_text = "First document", "Second document" | ||
documents = [ | ||
Document(text=first_doc_text, metadata={"type": "text"}), | ||
Document(text=second_doc_text, metadata={"type": "text"}) | ||
] | ||
|
||
doc_ids = vector_store_manager.index_documents("test_index", documents) | ||
|
||
assert len(doc_ids) == 2 | ||
assert set(doc_ids) == {BaseVectorStore.generate_doc_id(first_doc_text), | ||
BaseVectorStore.generate_doc_id(second_doc_text)} | ||
|
||
def test_index_documents_isolation(self, vector_store_manager): | ||
documents1 = [ | ||
Document(text="First document in index1", metadata={"type": "text"}), | ||
] | ||
documents2 = [ | ||
Document(text="First document in index2", metadata={"type": "text"}), | ||
] | ||
|
||
# Index documents in separate indices | ||
index_name_1, index_name_2 = "index1", "index2" | ||
vector_store_manager.index_documents(index_name_1, documents1) | ||
vector_store_manager.index_documents(index_name_2, documents2) | ||
|
||
assert vector_store_manager.list_all_indexed_documents() == { | ||
'index1': {"87117028123498eb7d757b1507aa3e840c63294f94c27cb5ec83c939dedb32fd": | ||
{'hash': '1e64a170be48c45efeaa8667ab35919106da0489ec99a11d0029f2842db133aa', | ||
'text': 'First document in index1'}}, | ||
'index2': {"49b198c0e126a99e1975f17b564756c25b4ad691a57eda583e232fd9bee6de91": | ||
{'hash': 'a222f875b83ce8b6eb72b3cae278b620de9bcc7c6b73222424d3ce979d1a463b', | ||
'text': 'First document in index2'}} | ||
} | ||
|
||
@patch('requests.post') | ||
def test_query_documents(self, mock_post, vector_store_manager): | ||
mock_response = { | ||
"result": "This is the completion from the API" | ||
} | ||
mock_post.return_value.json.return_value = mock_response | ||
|
||
documents = [ | ||
Document(text="First document", metadata={"type": "text"}), | ||
Document(text="Second document", metadata={"type": "text"}) | ||
] | ||
vector_store_manager.index_documents("test_index", documents) | ||
|
||
params = {"temperature": 0.7} | ||
query_result = vector_store_manager.query("test_index", "First", top_k=1, llm_params=params) | ||
|
||
assert query_result is not None | ||
assert query_result["response"] == "{'result': 'This is the completion from the API'}" | ||
assert query_result["source_nodes"][0]["text"] == "First document" | ||
assert query_result["source_nodes"][0]["score"] == pytest.approx(0.5795239210128784, rel=1e-6) | ||
|
||
mock_post.assert_called_once_with( | ||
INFERENCE_URL, | ||
json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True, 'temperature': 0.7}, | ||
headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} | ||
) | ||
|
||
def test_add_document(self, vector_store_manager): | ||
documents = [Document(text="Third document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", documents) | ||
|
||
new_document = [Document(text="Fourth document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", new_document) | ||
|
||
assert vector_store_manager.document_exists("test_index", new_document[0], | ||
BaseVectorStore.generate_doc_id("Fourth document")) | ||
|
||
def test_persist_index_1(self, vector_store_manager): | ||
documents = [Document(text="Test document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", documents) | ||
vector_store_manager._persist("test_index") | ||
assert os.path.exists(PERSIST_DIR) | ||
|
||
def test_persist_index_2(self, vector_store_manager): | ||
documents = [Document(text="Test document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", documents) | ||
|
||
documents = [Document(text="Another Test document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("another_test_index", documents) | ||
|
||
vector_store_manager._persist_all() | ||
assert os.path.exists(PERSIST_DIR) |
20 changes: 20 additions & 0 deletions
20
pkg/ragengine/services/tests/vector_store/test_chromadb_store.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
|
||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
import pytest | ||
import os | ||
|
||
from tempfile import TemporaryDirectory | ||
from services.tests.vector_store.test_base_store import BaseVectorStoreTest | ||
from services.vector_store.chromadb_store import ChromaDBVectorStoreHandler | ||
|
||
class TestChromaDBVectorStore(BaseVectorStoreTest): | ||
"""Test implementation for ChromaDB vector store.""" | ||
|
||
@pytest.fixture | ||
def vector_store_manager(self, init_embed_manager): | ||
with TemporaryDirectory() as temp_dir: | ||
print(f"Saving temporary test storage at: {temp_dir}") | ||
os.environ['PERSIST_DIR'] = temp_dir | ||
yield ChromaDBVectorStoreHandler(init_embed_manager) |
129 changes: 11 additions & 118 deletions
129
pkg/ragengine/services/tests/vector_store/test_faiss_store.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,126 +1,19 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
import os | ||
from tempfile import TemporaryDirectory | ||
from unittest.mock import patch | ||
|
||
import pytest | ||
import os | ||
|
||
from services.vector_store.base import BaseVectorStore | ||
from tempfile import TemporaryDirectory | ||
from services.tests.vector_store.test_base_store import BaseVectorStoreTest | ||
from services.vector_store.faiss_store import FaissVectorStoreHandler | ||
from services.models import Document | ||
from services.embedding.huggingface_local import LocalHuggingFaceEmbedding | ||
from services.config import MODEL_ID, INFERENCE_URL, INFERENCE_ACCESS_SECRET | ||
from services.config import PERSIST_DIR | ||
|
||
@pytest.fixture(scope='session') | ||
def init_embed_manager(): | ||
return LocalHuggingFaceEmbedding(MODEL_ID) | ||
|
||
@pytest.fixture | ||
def vector_store_manager(init_embed_manager): | ||
with TemporaryDirectory() as temp_dir: | ||
print(f"Saving temporary test storage at: {temp_dir}") | ||
# Mock the persistence directory | ||
os.environ['PERSIST_DIR'] = temp_dir | ||
yield FaissVectorStoreHandler(init_embed_manager) | ||
|
||
def test_index_documents(vector_store_manager): | ||
first_doc_text, second_doc_text = "First document", "Second document" | ||
documents = [ | ||
Document(text=first_doc_text, metadata={"type": "text"}), | ||
Document(text=second_doc_text, metadata={"type": "text"}) | ||
] | ||
|
||
doc_ids = vector_store_manager.index_documents("test_index", documents) | ||
class TestFaissVectorStore(BaseVectorStoreTest): | ||
"""Test implementation for FAISS vector store.""" | ||
|
||
assert len(doc_ids) == 2 | ||
assert set(doc_ids) == {BaseVectorStore.generate_doc_id(first_doc_text), | ||
BaseVectorStore.generate_doc_id(second_doc_text)} | ||
|
||
def test_index_documents_isolation(vector_store_manager): | ||
documents1 = [ | ||
Document(text="First document in index1", metadata={"type": "text"}), | ||
] | ||
documents2 = [ | ||
Document(text="First document in index2", metadata={"type": "text"}), | ||
] | ||
|
||
# Index documents in separate indices | ||
index_name_1, index_name_2 = "index1", "index2" | ||
vector_store_manager.index_documents(index_name_1, documents1) | ||
vector_store_manager.index_documents(index_name_2, documents2) | ||
|
||
assert vector_store_manager.list_all_indexed_documents() == { | ||
'index1': {"87117028123498eb7d757b1507aa3e840c63294f94c27cb5ec83c939dedb32fd": | ||
{'hash': '1e64a170be48c45efeaa8667ab35919106da0489ec99a11d0029f2842db133aa', | ||
'text': 'First document in index1'}}, | ||
'index2': {"49b198c0e126a99e1975f17b564756c25b4ad691a57eda583e232fd9bee6de91": | ||
{'hash': 'a222f875b83ce8b6eb72b3cae278b620de9bcc7c6b73222424d3ce979d1a463b', | ||
'text': 'First document in index2'}} | ||
} | ||
|
||
@patch('requests.post') | ||
def test_query_documents(mock_post, vector_store_manager): | ||
# Define Mock Response for Custom Inference API | ||
mock_response = { | ||
"result": "This is the completion from the API" | ||
} | ||
|
||
mock_post.return_value.json.return_value = mock_response | ||
|
||
# Add documents to index | ||
documents = [ | ||
Document(text="First document", metadata={"type": "text"}), | ||
Document(text="Second document", metadata={"type": "text"}) | ||
] | ||
vector_store_manager.index_documents("test_index", documents) | ||
|
||
params = {"temperature": 0.7} | ||
# Mock query and results | ||
query_result = vector_store_manager.query("test_index", "First", top_k=1, llm_params=params) | ||
|
||
assert query_result is not None | ||
assert query_result["response"] == "{'result': 'This is the completion from the API'}" | ||
assert query_result["source_nodes"][0]["text"] == "First document" | ||
assert query_result["source_nodes"][0]["score"] == pytest.approx(0.5795239210128784, rel=1e-6) | ||
|
||
mock_post.assert_called_once_with( | ||
INFERENCE_URL, | ||
# Auto-Generated by LlamaIndex | ||
json={"prompt": "Context information is below.\n---------------------\ntype: text\n\nFirst document\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: First\nAnswer: ", "formatted": True, 'temperature': 0.7}, | ||
headers={"Authorization": f"Bearer {INFERENCE_ACCESS_SECRET}"} | ||
) | ||
|
||
def test_add_document(vector_store_manager): | ||
documents = [Document(text="Third document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", documents) | ||
|
||
# Add a document to the existing index | ||
new_document = [Document(text="Fourth document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", new_document) | ||
|
||
# Assert that the document exists | ||
assert vector_store_manager.document_exists("test_index", new_document[0], | ||
BaseVectorStore.generate_doc_id("Fourth document")) | ||
|
||
def test_persist_index_1(vector_store_manager): | ||
"""Test that the index store is persisted.""" | ||
# Add a document and persist the index | ||
documents = [Document(text="Test document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", documents) | ||
vector_store_manager._persist("test_index") | ||
assert os.path.exists(PERSIST_DIR) | ||
|
||
def test_persist_index_2(vector_store_manager): | ||
"""Test that an index store is persisted.""" | ||
# Add a document and persist the index | ||
documents = [Document(text="Test document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("test_index", documents) | ||
|
||
documents = [Document(text="Another Test document", metadata={"type": "text"})] | ||
vector_store_manager.index_documents("another_test_index", documents) | ||
|
||
vector_store_manager._persist_all() | ||
assert os.path.exists(PERSIST_DIR) | ||
@pytest.fixture | ||
def vector_store_manager(self, init_embed_manager): | ||
with TemporaryDirectory() as temp_dir: | ||
print(f"Saving temporary test storage at: {temp_dir}") | ||
os.environ['PERSIST_DIR'] = temp_dir | ||
yield FaissVectorStoreHandler(init_embed_manager) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
from typing import List | ||
from ragengine.models import Document | ||
|
||
import chromadb | ||
from llama_index.vector_stores.chroma import ChromaVectorStore | ||
from .base import BaseVectorStore | ||
|
||
class ChromaDBVectorStoreHandler(BaseVectorStore): | ||
def __init__(self, embedding_manager): | ||
super().__init__(embedding_manager) | ||
self.chroma_client = chromadb.EphemeralClient() | ||
|
||
def _create_new_index(self, index_name: str, documents: List[Document]) -> List[str]: | ||
chroma_collection = self.chroma_client.create_collection(index_name) | ||
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) | ||
return self._create_index_common(index_name, documents, vector_store) |