Skip to content

Commit

Permalink
Patch chroma filters tests (#67)
Browse files Browse the repository at this point in the history
* fix test class

* remove deprecated method

* make the class not discoverable by pytest

* patch it with glue and sticks

* lint
  • Loading branch information
masci authored Nov 29, 2023
1 parent 671c688 commit a677c0d
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 46 deletions.
2 changes: 1 addition & 1 deletion integrations/chroma/src/chroma_haystack/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D
raise ValueError(msg)

if doc.content is None:
logger.warn(
logger.warning(
"ChromaDocumentStore can only store the text field of Documents: "
"'array', 'dataframe' and 'blob' will be dropped."
)
Expand Down
2 changes: 1 addition & 1 deletion integrations/chroma/src/chroma_haystack/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class ChromaDocumentStoreError(DocumentStoreError):
pass


class ChromaDocumentStoreFilterError(FilterError):
class ChromaDocumentStoreFilterError(FilterError, ValueError):
pass


Expand Down
105 changes: 63 additions & 42 deletions integrations/chroma/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,16 @@
import pytest
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
from haystack import Document
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import (
CountDocumentsTest,
DeleteDocumentsTest,
LegacyFilterDocumentsTest,
)

from chroma_haystack.document_store import ChromaDocumentStore


class TestEmbeddingFunction(EmbeddingFunction):
class _TestEmbeddingFunction(EmbeddingFunction):
"""
Chroma lets you provide custom functions to compute embeddings,
we use this feature to provide a fake algorithm returning random
Expand All @@ -26,49 +30,64 @@ def __call__(self, input: Documents) -> Embeddings: # noqa - chroma will inspec
return [np.random.default_rng().uniform(-1, 1, 768).tolist()]


class TestDocumentStore(DocumentStoreBaseTests):
class TestDocumentStore(CountDocumentsTest, DeleteDocumentsTest, LegacyFilterDocumentsTest):
"""
Common test cases will be provided by `DocumentStoreBaseTests` but
you can add more to this class.
"""

@pytest.fixture
def docstore(self) -> ChromaDocumentStore:
def document_store(self) -> ChromaDocumentStore:
"""
This is the most basic requirement for the child class: provide
an instance of this document store so the base class can use it.
"""
with mock.patch("chroma_haystack.document_store.get_embedding_function") as get_func:
get_func.return_value = TestEmbeddingFunction()
get_func.return_value = _TestEmbeddingFunction()
return ChromaDocumentStore(embedding_function="test_function", collection_name=str(uuid.uuid1()))

def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
"""
Assert that two lists of Documents are equal.
This is used in every test, if a Document Store implementation has a different behaviour
it should override this method.
This can happen for example when the Document Store sets a score to returned Documents.
Since we can't know what the score will be, we can't compare the Documents reliably.
"""
for doc_received, doc_expected in zip(received, expected):
assert doc_received.content == doc_expected.content
assert doc_received.meta == doc_expected.meta

@pytest.mark.unit
def test_ne_filter(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_ne_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
"""
We customize this test because Chroma consider "not equal" true when
a field is missing
"""
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$ne": "100"}})
assert self.contains_same_docs(result, [doc for doc in filterable_docs if doc.meta.get("page", "100") != "100"])
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(filters={"page": {"$ne": "100"}})
self.assert_documents_are_equal(
result, [doc for doc in filterable_docs if doc.meta.get("page", "100") != "100"]
)

@pytest.mark.unit
def test_delete_empty(self, docstore: ChromaDocumentStore):
def test_delete_empty(self, document_store: ChromaDocumentStore):
"""
Deleting a non-existing document should not raise with Chroma
"""
docstore.delete_documents(["test"])
document_store.delete_documents(["test"])

@pytest.mark.unit
def test_delete_not_empty_nonexisting(self, docstore: ChromaDocumentStore):
def test_delete_not_empty_nonexisting(self, document_store: ChromaDocumentStore):
"""
Deleting a non-existing document should not raise with Chroma
"""
doc = Document(content="test doc")
docstore.write_documents([doc])
docstore.delete_documents(["non_existing"])
document_store.write_documents([doc])
document_store.delete_documents(["non_existing"])

assert docstore.filter_documents(filters={"id": doc.id}) == [doc]
assert document_store.filter_documents(filters={"id": doc.id}) == [doc]

@pytest.mark.integration
def test_to_json(self, request):
Expand All @@ -95,141 +114,143 @@ def test_from_json(self):

@pytest.mark.skip(reason="Filter on array contents is not supported.")
@pytest.mark.unit
def test_filter_document_array(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_document_array(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter on dataframe contents is not supported.")
@pytest.mark.unit
def test_filter_document_dataframe(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_document_dataframe(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter on table contents is not supported.")
@pytest.mark.unit
def test_eq_filter_table(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_eq_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter on embedding value is not supported.")
@pytest.mark.unit
def test_eq_filter_embedding(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_eq_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="$in operator is not supported.")
@pytest.mark.unit
def test_in_filter_explicit(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_in_filter_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="$in operator is not supported. Filter on table contents is not supported.")
@pytest.mark.unit
def test_in_filter_table(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_in_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="$in operator is not supported.")
@pytest.mark.unit
def test_in_filter_embedding(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_in_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter on table contents is not supported.")
@pytest.mark.unit
def test_ne_filter_table(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_ne_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter on embedding value is not supported.")
@pytest.mark.unit
def test_ne_filter_embedding(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_ne_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="$nin operator is not supported. Filter on table contents is not supported.")
@pytest.mark.unit
def test_nin_filter_table(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_nin_filter_table(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="$nin operator is not supported. Filter on embedding value is not supported.")
@pytest.mark.unit
def test_nin_filter_embedding(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_nin_filter_embedding(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="$nin operator is not supported.")
@pytest.mark.unit
def test_nin_filter(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_nin_filter(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_simple_implicit_and_with_multi_key_dict(
self, docstore: ChromaDocumentStore, filterable_docs: List[Document]
self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_simple_explicit_and_with_multikey_dict(
self, docstore: ChromaDocumentStore, filterable_docs: List[Document]
self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_simple_explicit_and_with_list(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_simple_explicit_and_with_list(
self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_simple_implicit_and(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_simple_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_nested_explicit_and(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_nested_explicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_nested_implicit_and(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_nested_implicit_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_simple_or(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_simple_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_nested_or(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_nested_or(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter on table contents is not supported.")
@pytest.mark.unit
def test_filter_nested_and_or_explicit(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_nested_and_or_explicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_nested_and_or_implicit(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_nested_and_or_implicit(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_nested_or_and(self, docstore: ChromaDocumentStore, filterable_docs: List[Document]):
def test_filter_nested_or_and(self, document_store: ChromaDocumentStore, filterable_docs: List[Document]):
pass

@pytest.mark.skip(reason="Filter syntax not supported.")
@pytest.mark.unit
def test_filter_nested_multiple_identical_operators_same_level(
self, docstore: ChromaDocumentStore, filterable_docs: List[Document]
self, document_store: ChromaDocumentStore, filterable_docs: List[Document]
):
pass

@pytest.mark.skip(reason="Duplicate policy not supported.")
@pytest.mark.unit
def test_write_duplicate_fail(self, docstore: ChromaDocumentStore):
def test_write_duplicate_fail(self, document_store: ChromaDocumentStore):
pass

@pytest.mark.skip(reason="Duplicate policy not supported.")
@pytest.mark.unit
def test_write_duplicate_skip(self, docstore: ChromaDocumentStore):
def test_write_duplicate_skip(self, document_store: ChromaDocumentStore):
pass

@pytest.mark.skip(reason="Duplicate policy not supported.")
@pytest.mark.unit
def test_write_duplicate_overwrite(self, docstore: ChromaDocumentStore):
def test_write_duplicate_overwrite(self, document_store: ChromaDocumentStore):
pass
4 changes: 2 additions & 2 deletions integrations/chroma/tests/test_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_retriever_to_json(request):
)
retriever = ChromaQueryRetriever(ds, filters={"foo": "bar"}, top_k=99)
assert retriever.to_dict() == {
"type": "ChromaQueryRetriever",
"type": "chroma_haystack.retriever.ChromaQueryRetriever",
"init_parameters": {
"filters": {"foo": "bar"},
"top_k": 99,
Expand All @@ -27,7 +27,7 @@ def test_retriever_to_json(request):
@pytest.mark.integration
def test_retriever_from_json(request):
data = {
"type": "ChromaQueryRetriever",
"type": "chroma_haystack.retriever.ChromaQueryRetriever",
"init_parameters": {
"filters": {"bar": "baz"},
"top_k": 42,
Expand Down

0 comments on commit a677c0d

Please sign in to comment.