From 73a163067cb33d1eff75e193bd08ca4ec8202920 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 6 Mar 2024 20:55:44 +0200 Subject: [PATCH] [BUG]: Deleting FTS entries upon deletion of individual records (#1689) Refs: #1664 ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Follow up on #1664 ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python ## Documentation Changes N/A --- chromadb/segment/impl/metadata/sqlite.py | 19 ++++++++ chromadb/test/segment/test_metadata.py | 59 ++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/chromadb/segment/impl/metadata/sqlite.py b/chromadb/segment/impl/metadata/sqlite.py index 2e5af88b0d0..927c1f1fbba 100644 --- a/chromadb/segment/impl/metadata/sqlite.py +++ b/chromadb/segment/impl/metadata/sqlite.py @@ -404,6 +404,7 @@ def insert_into_fulltext_search() -> None: def _delete_record(self, cur: Cursor, record: EmbeddingRecord) -> None: """Delete a single EmbeddingRecord from the DB""" t = Table("embeddings") + fts_t = Table("embedding_fulltext_search") q = ( self._db.querybuilder() .from_(t) @@ -411,6 +412,23 @@ def _delete_record(self, cur: Cursor, record: EmbeddingRecord) -> None: .where(t.embedding_id == ParameterValue(record["id"])) .delete() ) + q_fts = ( + self._db.querybuilder() + .from_(fts_t) + .delete() + .where( + fts_t.rowid.isin( + self._db.querybuilder() + .from_(t) + .select(t.id) + .where( + t.segment_id == ParameterValue(self._db.uuid_to_db(self._id)) + ) + .where(t.embedding_id == ParameterValue(record["id"])) + ) + ) + ) + cur.execute(*get_sql(q_fts)) sql, params = get_sql(q) sql = sql + " RETURNING id" result = cur.execute(sql, params).fetchone() @@ -422,6 +440,7 @@ def _delete_record(self, cur: Cursor, record: EmbeddingRecord) -> None: # Manually delete metadata; cannot use cascade because # that triggers on replace metadata_t = Table("embedding_metadata") + q = ( self._db.querybuilder() .from_(metadata_t) diff --git a/chromadb/test/segment/test_metadata.py b/chromadb/test/segment/test_metadata.py index 2126c6d1feb..aaa8ec13bab 100644 --- a/chromadb/test/segment/test_metadata.py +++ b/chromadb/test/segment/test_metadata.py @@ -681,6 +681,65 @@ def test_delete_segment( assert len(res.fetchall()) == 0 +def test_delete_single_fts_record( + system: System, + sample_embeddings: Iterator[SubmitEmbeddingRecord], + produce_fns: ProducerFn, +) -> None: + producer = system.instance(Producer) + system.reset_state() + topic = str(segment_definition["topic"]) + + segment = SqliteMetadataSegment(system, segment_definition) + segment.start() + + embeddings, seq_ids = produce_fns(producer, topic, sample_embeddings, 10) + max_id = seq_ids[-1] + + sync(segment, max_id) + + assert segment.count() == 10 + results = segment.get_metadata(ids=["embedding_0"]) + assert_equiv_records(embeddings[:1], results) + _id = segment._id + _db = system.instance(SqliteDB) + # Delete by ID + delete_embedding = SubmitEmbeddingRecord( + id="embedding_0", + embedding=None, + encoding=None, + metadata=None, + operation=Operation.DELETE, + collection_id=uuid.UUID(int=0), + ) + max_id = produce_fns(producer, topic, (delete_embedding for _ in range(1)), 1)[1][ + -1 + ] + t = Table("embeddings") + + sync(segment, max_id) + fts_t = Table("embedding_fulltext_search") + q_fts = ( + _db.querybuilder() + .from_(fts_t) + .select() + .where( + fts_t.rowid.isin( + _db.querybuilder() + .from_(t) + .select(t.id) + .where(t.segment_id == ParameterValue(_db.uuid_to_db(_id))) + .where(t.embedding_id == ParameterValue(delete_embedding["id"])) + ) + ) + ) + sql, params = get_sql(q_fts) + with _db.tx() as cur: + res = cur.execute(sql, params) + # assert that the ids that are deleted from the segment are also gone from the fts table + assert len(res.fetchall()) == 0 + + def test_metadata_validation_forbidden_key() -> None: with pytest.raises(ValueError, match="chroma:document"): validate_metadata(