From 107bfc56d72ef69a65dffaa31ee304be4260de30 Mon Sep 17 00:00:00 2001 From: raspawar Date: Thu, 2 Jan 2025 19:12:51 +0530 Subject: [PATCH 1/4] add support for nvidia/llama-3.2-nv-embedqa-1b-v2's dimensions param --- .../llama_index/embeddings/nvidia/base.py | 30 ++++++++-- .../tests/test_integration.py | 55 +++++++++++++++++++ 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py index 0c5802b3536e6..5ed59ea0d43d0 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py @@ -29,6 +29,7 @@ "nvidia/nv-embedqa-e5-v5": "https://integrate.api.nvidia.com/v1/", "baai/bge-m3": "https://integrate.api.nvidia.com/v1/", "nvidia/llama-3.2-nv-embedqa-1b-v1": "https://integrate.api.nvidia.com/v1/", + "nvidia/llama-3.2-nv-embedqa-1b-v2": "https://integrate.api.nvidia.com/v1/", } KNOWN_URLS = list(MODEL_ENDPOINT_MAP.values()) @@ -65,6 +66,14 @@ class NVIDIAEmbedding(BaseEmbedding): ge=0, ) + dimensions: Optional[int] = Field( + default=None, + description=( + "The number of dimensions for the embeddings. This parameter is not " + "supported by all models." + ), + ) + _client: Any = PrivateAttr() _aclient: Any = PrivateAttr() _is_hosted: bool = PrivateAttr(True) @@ -74,6 +83,7 @@ def __init__( model: Optional[str] = None, timeout: Optional[float] = 120, max_retries: Optional[int] = 5, + dimensions: Optional[int] = 0, nvidia_api_key: Optional[str] = None, api_key: Optional[str] = None, base_url: Optional[str] = None, @@ -91,6 +101,8 @@ def __init__( - model (str, optional): The name of the model to use for embeddings. - timeout (float, optional): The timeout for requests to the NIM service, in seconds. Defaults to 120. - max_retries (int, optional): The maximum number of retries for requests to the NIM service. Defaults to 5. + - dimensions (int, optional): The number of dimensions for the embeddings. This + parameter is not supported by all models. - nvidia_api_key (str, optional): The API key for the NIM service. This is required if using a hosted NIM. - api_key (str, optional): An alternative parameter for providing the API key. - base_url (str, optional): The base URL for the NIM service. If not provided, the service will default to a hosted NIM. @@ -106,8 +118,10 @@ def __init__( model=model, embed_batch_size=embed_batch_size, callback_manager=callback_manager, + dimensions=dimensions, **kwargs, ) + self.dimensions = dimensions if embed_batch_size > 259: raise ValueError("The batch size should not be larger than 259.") @@ -235,11 +249,14 @@ def class_name(cls) -> str: def _get_query_embedding(self, query: str) -> List[float]: """Get query embedding.""" + extra_body = {"input_type": "passage", "truncate": self.truncate} + if self.dimensions: + extra_body["dimensions"] = self.dimensions return ( self._client.embeddings.create( input=[query], model=self.model, - extra_body={"input_type": "query", "truncate": self.truncate}, + extra_body=extra_body, ) .data[0] .embedding @@ -247,11 +264,14 @@ def _get_query_embedding(self, query: str) -> List[float]: def _get_text_embedding(self, text: str) -> List[float]: """Get text embedding.""" + extra_body = {"input_type": "passage", "truncate": self.truncate} + if self.dimensions: + extra_body["dimensions"] = self.dimensions return ( self._client.embeddings.create( input=[text], model=self.model, - extra_body={"input_type": "passage", "truncate": self.truncate}, + extra_body=extra_body, ) .data[0] .embedding @@ -260,11 +280,13 @@ def _get_text_embedding(self, text: str) -> List[float]: def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: """Get text embeddings.""" assert len(texts) <= 259, "The batch size should not be larger than 259." - + extra_body = {"input_type": "passage", "truncate": self.truncate} + if self.dimensions: + extra_body["dimensions"] = self.dimensions data = self._client.embeddings.create( input=texts, model=self.model, - extra_body={"input_type": "passage", "truncate": self.truncate}, + extra_body=extra_body, ).data return [d.embedding for d in data] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py index 30873c1baee59..9b630b51da21d 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py @@ -10,3 +10,58 @@ def test_basic(model: str, mode: dict) -> None: assert isinstance(response, list) assert len(response) > 0 assert isinstance(response[0], float) + + +## ================== nvidia/llama-3.2-nv-embedqa-1b-v2 model dimensions param test cases ================== +@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) +def test_embed_text_with_dimensions(mode: dict, dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + assert len(embedding.get_query_embedding(query)) == dimensions + + +@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) +def test_embed_query_with_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + assert len(embedding.get_query_embedding(query)) == dimensions + + +@pytest.mark.parametrize("dimensions", [102400]) +def test_embed_query_with_large_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + assert 2048 <= len(embedding.get_query_embedding(query)) < dimensions + + +@pytest.mark.parametrize("dimensions", [102400]) +def test_embed_documents_with_large_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + documents = ["foo bar", "bar foo"] + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + output = embedding.get_text_embedding_batch(documents) + assert len(output) == len(documents) + assert all(2048 <= len(doc) < dimensions for doc in output) + + +@pytest.mark.parametrize("dimensions", [-1]) +def test_embed_query_invalid_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + with pytest.raises(Exception) as exc: + NVIDIAEmbedding(model=model, dimensions=dimensions).get_query_embedding(query) + assert "400" in str(exc.value) + + +@pytest.mark.parametrize("dimensions", [-1]) +def test_embed_documents_invalid_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + documents = ["foo bar", "bar foo"] + with pytest.raises(Exception) as exc: + NVIDIAEmbedding(model=model, dimensions=dimensions).get_text_embedding_batch( + documents + ) + assert "400" in str(exc.value) From 75815719d767c376fb98f5c4b321f077b44f6f7f Mon Sep 17 00:00:00 2001 From: raspawar Date: Thu, 2 Jan 2025 19:19:46 +0530 Subject: [PATCH 2/4] version bump --- .../embeddings/llama-index-embeddings-nvidia/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml index 6bace74e23bc9..279e2b5345edf 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-embeddings-nvidia" readme = "README.md" -version = "0.3.0" +version = "0.3.1" [tool.poetry.dependencies] python = ">=3.9,<4.0" From 86f08f7a0e12582ff466558b4d161f8cba2b1a6d Mon Sep 17 00:00:00 2001 From: raspawar Date: Thu, 2 Jan 2025 19:34:40 +0530 Subject: [PATCH 3/4] add support for v2 reranking model --- .../llama_index/postprocessor/nvidia_rerank/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py index e0e595448460b..38b499baf4910 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py @@ -22,6 +22,7 @@ "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", "nvidia/llama-3.2-nv-rerankqa-1b-v1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v1/reranking", + "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", } dispatcher = get_dispatcher(__name__) From 0c2cbb768f8faecbde518094bc5835ee8cb8ec01 Mon Sep 17 00:00:00 2001 From: raspawar Date: Thu, 2 Jan 2025 19:35:21 +0530 Subject: [PATCH 4/4] mark integration test cases --- .../llama-index-embeddings-nvidia/tests/test_integration.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py index 9b630b51da21d..e5705ce741524 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py @@ -13,6 +13,7 @@ def test_basic(model: str, mode: dict) -> None: ## ================== nvidia/llama-3.2-nv-embedqa-1b-v2 model dimensions param test cases ================== +@pytest.mark.integration() @pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) def test_embed_text_with_dimensions(mode: dict, dimensions: int) -> None: model = "nvidia/llama-3.2-nv-embedqa-1b-v2" @@ -21,6 +22,7 @@ def test_embed_text_with_dimensions(mode: dict, dimensions: int) -> None: assert len(embedding.get_query_embedding(query)) == dimensions +@pytest.mark.integration() @pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) def test_embed_query_with_dimensions(dimensions: int) -> None: model = "nvidia/llama-3.2-nv-embedqa-1b-v2" @@ -29,6 +31,7 @@ def test_embed_query_with_dimensions(dimensions: int) -> None: assert len(embedding.get_query_embedding(query)) == dimensions +@pytest.mark.integration() @pytest.mark.parametrize("dimensions", [102400]) def test_embed_query_with_large_dimensions(dimensions: int) -> None: model = "nvidia/llama-3.2-nv-embedqa-1b-v2" @@ -37,6 +40,7 @@ def test_embed_query_with_large_dimensions(dimensions: int) -> None: assert 2048 <= len(embedding.get_query_embedding(query)) < dimensions +@pytest.mark.integration() @pytest.mark.parametrize("dimensions", [102400]) def test_embed_documents_with_large_dimensions(dimensions: int) -> None: model = "nvidia/llama-3.2-nv-embedqa-1b-v2" @@ -47,6 +51,7 @@ def test_embed_documents_with_large_dimensions(dimensions: int) -> None: assert all(2048 <= len(doc) < dimensions for doc in output) +@pytest.mark.integration() @pytest.mark.parametrize("dimensions", [-1]) def test_embed_query_invalid_dimensions(dimensions: int) -> None: model = "nvidia/llama-3.2-nv-embedqa-1b-v2" @@ -56,6 +61,7 @@ def test_embed_query_invalid_dimensions(dimensions: int) -> None: assert "400" in str(exc.value) +@pytest.mark.integration() @pytest.mark.parametrize("dimensions", [-1]) def test_embed_documents_invalid_dimensions(dimensions: int) -> None: model = "nvidia/llama-3.2-nv-embedqa-1b-v2"