Skip to content

Commit

Permalink
[PERF] Multipart s3 downloads passing through NAC (#2698)
Browse files Browse the repository at this point in the history
*Summarize the changes made by this PR.*
 - Introduces a parallel fetch API that splits up gets into 8 MB parallel downloads
 - This passes through Admission control so it is rate limited and deduped
 - HNSW provider consumes this API

Added rust test
- [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust

None
  • Loading branch information
sanketkedia authored and spikechroma committed Aug 22, 2024
1 parent 6e7f7e4 commit 84c08d4
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 24 deletions.
6 changes: 1 addition & 5 deletions chromadb/api/models/AsyncCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def add(
images,
uris,
)

await self._client._add(
embedding_set["ids"],
self.id,
Expand All @@ -84,10 +84,6 @@ async def add(
embedding_set["uris"],
)

return {
"ids": embedding_set["ids"],
}

async def count(self) -> int:
"""The total number of embeddings added to the database
Expand Down
4 changes: 0 additions & 4 deletions chromadb/api/models/Collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,6 @@ def add(
embedding_set["uris"],
)

return {
"ids": embedding_set["ids"],
}

def get(
self,
ids: Optional[OneOrMany[ID]] = None,
Expand Down
10 changes: 4 additions & 6 deletions chromadb/api/models/CollectionCommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def get_model(self) -> CollectionModel:

def _unpack_embedding_set(
self,
ids: Optional[OneOrMany[ID]],
ids: OneOrMany[ID],
embeddings: Optional[
Union[
OneOrMany[Embedding],
Expand Down Expand Up @@ -180,16 +180,15 @@ def _unpack_embedding_set(

def _validate_embedding_set(
self,
ids: Optional[IDs],
ids: IDs,
embeddings: Optional[Embeddings],
metadatas: Optional[Metadatas],
documents: Optional[Documents],
images: Optional[Images],
uris: Optional[URIs],
require_embeddings_or_data: bool = True,
can_ids_be_empty: bool = False,
) -> None:
valid_ids = validate_ids(ids, can_ids_be_empty=can_ids_be_empty)
valid_ids = validate_ids(ids)
valid_embeddings = (
validate_embeddings(embeddings) if embeddings is not None else None
)
Expand Down Expand Up @@ -428,7 +427,7 @@ def _update_model_after_modify_success(

def _process_add_request(
self,
ids: Optional[OneOrMany[ID]],
ids: OneOrMany[ID],
embeddings: Optional[
Union[
OneOrMany[Embedding],
Expand Down Expand Up @@ -463,7 +462,6 @@ def _process_add_request(
unpacked_embedding_set["images"],
unpacked_embedding_set["uris"],
require_embeddings_or_data=False,
can_ids_be_empty=True,
)

prepared_embeddings = self._compute_embeddings(
Expand Down
9 changes: 0 additions & 9 deletions chromadb/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,6 @@ class IncludeEnum(str, Enum):
data = "data"


# Record set
class RecordSet(TypedDict):
ids: IDs
embeddings: Optional[Embeddings]
metadatas: Optional[Metadatas]
documents: Optional[Documents]
images: Optional[Images]
uris: Optional[URIs]

# Record set
class RecordSet(TypedDict):
ids: IDs
Expand Down

0 comments on commit 84c08d4

Please sign in to comment.