From bfffa2ae7db560beddb6c2145c399090bd236c78 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Thu, 8 Aug 2024 16:06:05 -0400 Subject: [PATCH 01/13] [Fix] Fixing the persistence of chroma if client settings and persistent path given. --- .../chroma/langchain_chroma/vectorstores.py | 1 + .../integration_tests/test_vectorstores.py | 43 ++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index 76ad86d67a7a4..75293732b738f 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -302,6 +302,7 @@ def __init__( client_settings.persist_directory = ( persist_directory or client_settings.persist_directory ) + client_settings.is_persistent = client_settings.persist_directory is not None _client_settings = client_settings elif persist_directory: diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index 8764a06e156c1..9b4147689a888 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -1,5 +1,6 @@ """Test Chroma functionality.""" - +import os.path +import shutil import uuid from typing import Generator @@ -144,6 +145,8 @@ def test_chroma_search_filter_with_scores() -> None: def test_chroma_with_persistence() -> None: """Test end to end construction and search, with persistence.""" chroma_persist_dir = "./tests/persist_dir" + if os.path.exists(chroma_persist_dir): + shutil.rmtree(chroma_persist_dir) collection_name = "test_collection" texts = ["foo", "bar", "baz"] docsearch = Chroma.from_texts( @@ -156,6 +159,8 @@ def test_chroma_with_persistence() -> None: output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo")] + assert os.path.exists(chroma_persist_dir) + # Get a new VectorStore from the persisted directory docsearch = Chroma( collection_name=collection_name, @@ -171,6 +176,42 @@ def test_chroma_with_persistence() -> None: # Data will be automatically persisted on object deletion # Or on program exit +def test_chroma_with_persistence_with_client_settings() -> None: + """Test end to end construction and search, with persistence.""" + chroma_persist_dir = "./tests/persist_dir_2" + if os.path.exists(chroma_persist_dir): + shutil.rmtree(chroma_persist_dir) + + client_settings = chromadb.config.Settings() + collection_name = "test_collection" + texts = ["foo", "bar", "baz"] + docsearch = Chroma.from_texts( + collection_name=collection_name, + texts=texts, + embedding=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + client_settings=client_settings + ) + + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + assert os.path.exists(chroma_persist_dir) + + # Get a new VectorStore from the persisted directory + docsearch = Chroma( + collection_name=collection_name, + embedding_function=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + ) + output = docsearch.similarity_search("foo", k=1) + + # Clean up + docsearch.delete_collection() + + # Persist doesn't need to be called again + # Data will be automatically persisted on object deletion + # Or on program exit def test_chroma_mmr() -> None: """Test end to end construction and search.""" From d5057717ae1cb94cc8d8b0edad40e7694ed56a29 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Tue, 20 Aug 2024 17:22:50 -0700 Subject: [PATCH 02/13] x --- libs/partners/chroma/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/partners/chroma/.gitignore b/libs/partners/chroma/.gitignore index da0d250a6a8fd..93f133d6610f4 100644 --- a/libs/partners/chroma/.gitignore +++ b/libs/partners/chroma/.gitignore @@ -1,2 +1,3 @@ __pycache__ */persist_dir +chroma/ \ No newline at end of file From c9048445378f863d3b91a6d360054f62dfb4199c Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Tue, 20 Aug 2024 17:23:13 -0700 Subject: [PATCH 03/13] x --- libs/partners/chroma/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/partners/chroma/.gitignore b/libs/partners/chroma/.gitignore index 93f133d6610f4..aa64ea142f06f 100644 --- a/libs/partners/chroma/.gitignore +++ b/libs/partners/chroma/.gitignore @@ -1,3 +1,3 @@ __pycache__ */persist_dir -chroma/ \ No newline at end of file +chroma/ From 6f915dc34fa86566b5503097ba4e13c2b1eff587 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Wed, 21 Aug 2024 22:05:54 -0400 Subject: [PATCH 04/13] Fixing lint issue. --- libs/partners/chroma/langchain_chroma/vectorstores.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index 75293732b738f..822c48cc498d8 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -302,7 +302,8 @@ def __init__( client_settings.persist_directory = ( persist_directory or client_settings.persist_directory ) - client_settings.is_persistent = client_settings.persist_directory is not None + client_settings.is_persistent = ( + client_settings.persist_directory is not None) _client_settings = client_settings elif persist_directory: From 1b0527b5ce7439a5532f467ed8c5c7219c738e5c Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 15:43:31 -0500 Subject: [PATCH 05/13] Updated tests to use tempdirectory for persistence of chroma tests. --- .../integration_tests/test_vectorstores.py | 123 ++++++++++-------- 1 file changed, 66 insertions(+), 57 deletions(-) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index 1622c13bb557a..e6fc1143a6102 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -1,6 +1,7 @@ """Test Chroma functionality.""" import os.path import shutil +import tempfile import uuid from typing import ( Generator, @@ -269,78 +270,86 @@ def test_chroma_search_filter_with_scores() -> None: def test_chroma_with_persistence() -> None: """Test end to end construction and search, with persistence.""" - chroma_persist_dir = "./tests/persist_dir" - if os.path.exists(chroma_persist_dir): - shutil.rmtree(chroma_persist_dir) - collection_name = "test_collection" - texts = ["foo", "bar", "baz"] - ids = [f"id_{i}" for i in range(len(texts))] + with tempfile.TemporaryDirectory() as chroma_persist_dir: + collection_name = "test_collection" + texts = ["foo", "bar", "baz"] + ids = [f"id_{i}" for i in range(len(texts))] + + docsearch = Chroma.from_texts( + collection_name=collection_name, + texts=texts, + embedding=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + ids=ids, + ) - docsearch = Chroma.from_texts( - collection_name=collection_name, - texts=texts, - embedding=FakeEmbeddings(), - persist_directory=chroma_persist_dir, - ids=ids, - ) + try: + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", id="id_0")] - output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo", id="id_0")] + assert os.path.exists(chroma_persist_dir) - assert os.path.exists(chroma_persist_dir) + # Get a new VectorStore from the persisted directory + docsearch = Chroma( + collection_name=collection_name, + embedding_function=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", id="id_0")] - # Get a new VectorStore from the persisted directory - docsearch = Chroma( - collection_name=collection_name, - embedding_function=FakeEmbeddings(), - persist_directory=chroma_persist_dir, - ) - output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo", id="id_0")] + # Clean up + docsearch.delete_collection() - # Clean up - docsearch.delete_collection() + # Persist doesn't need to be called again + # Data will be automatically persisted on object deletion + # Or on program exit + + finally: + docsearch._client._server._sysdb.stop() + docsearch._client._server._manager.stop() - # Persist doesn't need to be called again - # Data will be automatically persisted on object deletion - # Or on program exit def test_chroma_with_persistence_with_client_settings() -> None: """Test end to end construction and search, with persistence.""" - chroma_persist_dir = "./tests/persist_dir_2" - if os.path.exists(chroma_persist_dir): - shutil.rmtree(chroma_persist_dir) + with tempfile.TemporaryDirectory() as chroma_persist_dir: + client_settings = chromadb.config.Settings() + collection_name = "test_collection" + texts = ["foo", "bar", "baz"] + ids = [f"id_{i}" for i in range(len(texts))] + docsearch = Chroma.from_texts( + collection_name=collection_name, + texts=texts, + embedding=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + client_settings=client_settings, + ids=ids + ) - client_settings = chromadb.config.Settings() - collection_name = "test_collection" - texts = ["foo", "bar", "baz"] - docsearch = Chroma.from_texts( - collection_name=collection_name, - texts=texts, - embedding=FakeEmbeddings(), - persist_directory=chroma_persist_dir, - client_settings=client_settings - ) + try: + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", id="id_0")] - output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo")] + assert os.path.exists(chroma_persist_dir) - assert os.path.exists(chroma_persist_dir) + # Get a new VectorStore from the persisted directory + docsearch = Chroma( + collection_name=collection_name, + embedding_function=FakeEmbeddings(), + persist_directory=chroma_persist_dir, + ) + output = docsearch.similarity_search("foo", k=1) - # Get a new VectorStore from the persisted directory - docsearch = Chroma( - collection_name=collection_name, - embedding_function=FakeEmbeddings(), - persist_directory=chroma_persist_dir, - ) - output = docsearch.similarity_search("foo", k=1) + # Clean up + docsearch.delete_collection() - # Clean up - docsearch.delete_collection() + # Persist doesn't need to be called again + # Data will be automatically persisted on object deletion + # Or on program exit + finally: + docsearch._client._server._sysdb.stop() + docsearch._client._server._manager.stop() - # Persist doesn't need to be called again - # Data will be automatically persisted on object deletion - # Or on program exit def test_chroma_mmr() -> None: """Test end to end construction and search.""" From 1290227be75509d74431bf841794b8eac19ebbc3 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 15:46:22 -0500 Subject: [PATCH 06/13] Added comments --- .../chroma/tests/integration_tests/test_vectorstores.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index e6fc1143a6102..fba0ebb5d436e 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -306,6 +306,8 @@ def test_chroma_with_persistence() -> None: # Or on program exit finally: + + # Need to stop the chrom system database and segment manager to be able to delete the files after testing docsearch._client._server._sysdb.stop() docsearch._client._server._manager.stop() @@ -347,6 +349,8 @@ def test_chroma_with_persistence_with_client_settings() -> None: # Data will be automatically persisted on object deletion # Or on program exit finally: + + # Need to stop the chrom system database and segment manager to be able to delete the files after testing docsearch._client._server._sysdb.stop() docsearch._client._server._manager.stop() From e34549b6a23debf8f4aee6b7362a3e2a3b51e517 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 15:51:12 -0500 Subject: [PATCH 07/13] Fix lint error. --- libs/partners/chroma/langchain_chroma/vectorstores.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index 7b5c34dfbcf78..5c6fbe12a7776 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -319,7 +319,8 @@ def __init__( persist_directory or client_settings.persist_directory ) client_settings.is_persistent = ( - client_settings.persist_directory is not None) + client_settings.persist_directory is not None + ) _client_settings = client_settings elif persist_directory: From 58b34299ad77d1d265c4b1102bb2e28f1ae39d9e Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 15:53:53 -0500 Subject: [PATCH 08/13] One more lint fix. --- libs/partners/chroma/langchain_chroma/vectorstores.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index 5c6fbe12a7776..648ff439a3edb 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -319,7 +319,7 @@ def __init__( persist_directory or client_settings.persist_directory ) client_settings.is_persistent = ( - client_settings.persist_directory is not None + client_settings.persist_directory is not None ) _client_settings = client_settings From 1493e6404a15b9b788cd16ff46161a00c6220940 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 15:57:45 -0500 Subject: [PATCH 09/13] Remove unused shutil in tests. --- .../partners/chroma/tests/integration_tests/test_vectorstores.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index fba0ebb5d436e..6a123369dc987 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -1,6 +1,5 @@ """Test Chroma functionality.""" import os.path -import shutil import tempfile import uuid from typing import ( From 310efaf78d024b289887d9d8b536d43e0b05f686 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 16:00:56 -0500 Subject: [PATCH 10/13] Line too long. --- .../chroma/tests/integration_tests/test_vectorstores.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index 6a123369dc987..d8840c3b07e0b 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -306,7 +306,8 @@ def test_chroma_with_persistence() -> None: finally: - # Need to stop the chrom system database and segment manager to be able to delete the files after testing + # Need to stop the chrom system database and segment manager + # to be able to delete the files after testing docsearch._client._server._sysdb.stop() docsearch._client._server._manager.stop() @@ -349,7 +350,8 @@ def test_chroma_with_persistence_with_client_settings() -> None: # Or on program exit finally: - # Need to stop the chrom system database and segment manager to be able to delete the files after testing + # Need to stop the chrom system database and segment manager + # to be able to delete the files after testing docsearch._client._server._sysdb.stop() docsearch._client._server._manager.stop() From aa7e02e1300fb66ae98ca7c15b25fe96de1d97e2 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 16:03:58 -0500 Subject: [PATCH 11/13] lint fixes. --- .../chroma/tests/integration_tests/test_vectorstores.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index d8840c3b07e0b..bc5396e8f655e 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -305,7 +305,6 @@ def test_chroma_with_persistence() -> None: # Or on program exit finally: - # Need to stop the chrom system database and segment manager # to be able to delete the files after testing docsearch._client._server._sysdb.stop() @@ -325,7 +324,7 @@ def test_chroma_with_persistence_with_client_settings() -> None: embedding=FakeEmbeddings(), persist_directory=chroma_persist_dir, client_settings=client_settings, - ids=ids + ids=ids, ) try: @@ -348,8 +347,8 @@ def test_chroma_with_persistence_with_client_settings() -> None: # Persist doesn't need to be called again # Data will be automatically persisted on object deletion # Or on program exit + finally: - # Need to stop the chrom system database and segment manager # to be able to delete the files after testing docsearch._client._server._sysdb.stop() From 6bb7505e0b152dd2b2197b490fde50c65c49fd49 Mon Sep 17 00:00:00 2001 From: Vinit Kudva Date: Mon, 16 Dec 2024 16:07:07 -0500 Subject: [PATCH 12/13] lint fixes. --- .../chroma/tests/integration_tests/test_vectorstores.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index bc5396e8f655e..a58428aa93d3a 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -1,4 +1,5 @@ """Test Chroma functionality.""" + import os.path import tempfile import uuid @@ -347,7 +348,7 @@ def test_chroma_with_persistence_with_client_settings() -> None: # Persist doesn't need to be called again # Data will be automatically persisted on object deletion # Or on program exit - + finally: # Need to stop the chrom system database and segment manager # to be able to delete the files after testing From 225c49ffb66c8c44f170829bef38506dd9a0a384 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 17 Dec 2024 09:56:47 -0500 Subject: [PATCH 13/13] add asserts --- .../tests/integration_tests/test_vectorstores.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index a58428aa93d3a..7420a99ec5ef9 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -12,6 +12,7 @@ import pytest # type: ignore[import-not-found] import requests from chromadb.api.client import SharedSystemClient +from chromadb.api.segment import SegmentAPI from chromadb.api.types import Embeddable from langchain_core.documents import Document from langchain_core.embeddings.fake import FakeEmbeddings as Fak @@ -308,8 +309,11 @@ def test_chroma_with_persistence() -> None: finally: # Need to stop the chrom system database and segment manager # to be able to delete the files after testing - docsearch._client._server._sysdb.stop() - docsearch._client._server._manager.stop() + client = docsearch._client + assert isinstance(client, chromadb.ClientCreator) + assert isinstance(client._server, SegmentAPI) + client._server._sysdb.stop() + client._server._manager.stop() def test_chroma_with_persistence_with_client_settings() -> None: @@ -352,8 +356,11 @@ def test_chroma_with_persistence_with_client_settings() -> None: finally: # Need to stop the chrom system database and segment manager # to be able to delete the files after testing - docsearch._client._server._sysdb.stop() - docsearch._client._server._manager.stop() + client = docsearch._client + assert isinstance(client, chromadb.ClientCreator) + assert isinstance(client._server, SegmentAPI) + client._server._sysdb.stop() + client._server._manager.stop() def test_chroma_mmr() -> None: