diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py index 0c5802b3536e6..5ed59ea0d43d0 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/llama_index/embeddings/nvidia/base.py @@ -29,6 +29,7 @@ "nvidia/nv-embedqa-e5-v5": "https://integrate.api.nvidia.com/v1/", "baai/bge-m3": "https://integrate.api.nvidia.com/v1/", "nvidia/llama-3.2-nv-embedqa-1b-v1": "https://integrate.api.nvidia.com/v1/", + "nvidia/llama-3.2-nv-embedqa-1b-v2": "https://integrate.api.nvidia.com/v1/", } KNOWN_URLS = list(MODEL_ENDPOINT_MAP.values()) @@ -65,6 +66,14 @@ class NVIDIAEmbedding(BaseEmbedding): ge=0, ) + dimensions: Optional[int] = Field( + default=None, + description=( + "The number of dimensions for the embeddings. This parameter is not " + "supported by all models." + ), + ) + _client: Any = PrivateAttr() _aclient: Any = PrivateAttr() _is_hosted: bool = PrivateAttr(True) @@ -74,6 +83,7 @@ def __init__( model: Optional[str] = None, timeout: Optional[float] = 120, max_retries: Optional[int] = 5, + dimensions: Optional[int] = 0, nvidia_api_key: Optional[str] = None, api_key: Optional[str] = None, base_url: Optional[str] = None, @@ -91,6 +101,8 @@ def __init__( - model (str, optional): The name of the model to use for embeddings. - timeout (float, optional): The timeout for requests to the NIM service, in seconds. Defaults to 120. - max_retries (int, optional): The maximum number of retries for requests to the NIM service. Defaults to 5. + - dimensions (int, optional): The number of dimensions for the embeddings. This + parameter is not supported by all models. - nvidia_api_key (str, optional): The API key for the NIM service. This is required if using a hosted NIM. - api_key (str, optional): An alternative parameter for providing the API key. - base_url (str, optional): The base URL for the NIM service. If not provided, the service will default to a hosted NIM. @@ -106,8 +118,10 @@ def __init__( model=model, embed_batch_size=embed_batch_size, callback_manager=callback_manager, + dimensions=dimensions, **kwargs, ) + self.dimensions = dimensions if embed_batch_size > 259: raise ValueError("The batch size should not be larger than 259.") @@ -235,11 +249,14 @@ def class_name(cls) -> str: def _get_query_embedding(self, query: str) -> List[float]: """Get query embedding.""" + extra_body = {"input_type": "passage", "truncate": self.truncate} + if self.dimensions: + extra_body["dimensions"] = self.dimensions return ( self._client.embeddings.create( input=[query], model=self.model, - extra_body={"input_type": "query", "truncate": self.truncate}, + extra_body=extra_body, ) .data[0] .embedding @@ -247,11 +264,14 @@ def _get_query_embedding(self, query: str) -> List[float]: def _get_text_embedding(self, text: str) -> List[float]: """Get text embedding.""" + extra_body = {"input_type": "passage", "truncate": self.truncate} + if self.dimensions: + extra_body["dimensions"] = self.dimensions return ( self._client.embeddings.create( input=[text], model=self.model, - extra_body={"input_type": "passage", "truncate": self.truncate}, + extra_body=extra_body, ) .data[0] .embedding @@ -260,11 +280,13 @@ def _get_text_embedding(self, text: str) -> List[float]: def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: """Get text embeddings.""" assert len(texts) <= 259, "The batch size should not be larger than 259." - + extra_body = {"input_type": "passage", "truncate": self.truncate} + if self.dimensions: + extra_body["dimensions"] = self.dimensions data = self._client.embeddings.create( input=texts, model=self.model, - extra_body={"input_type": "passage", "truncate": self.truncate}, + extra_body=extra_body, ).data return [d.embedding for d in data] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml index 6bace74e23bc9..279e2b5345edf 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-embeddings-nvidia" readme = "README.md" -version = "0.3.0" +version = "0.3.1" [tool.poetry.dependencies] python = ">=3.9,<4.0" diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py index 30873c1baee59..e5705ce741524 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nvidia/tests/test_integration.py @@ -10,3 +10,64 @@ def test_basic(model: str, mode: dict) -> None: assert isinstance(response, list) assert len(response) > 0 assert isinstance(response[0], float) + + +## ================== nvidia/llama-3.2-nv-embedqa-1b-v2 model dimensions param test cases ================== +@pytest.mark.integration() +@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) +def test_embed_text_with_dimensions(mode: dict, dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + assert len(embedding.get_query_embedding(query)) == dimensions + + +@pytest.mark.integration() +@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) +def test_embed_query_with_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + assert len(embedding.get_query_embedding(query)) == dimensions + + +@pytest.mark.integration() +@pytest.mark.parametrize("dimensions", [102400]) +def test_embed_query_with_large_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + assert 2048 <= len(embedding.get_query_embedding(query)) < dimensions + + +@pytest.mark.integration() +@pytest.mark.parametrize("dimensions", [102400]) +def test_embed_documents_with_large_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + documents = ["foo bar", "bar foo"] + embedding = NVIDIAEmbedding(model=model, dimensions=dimensions) + output = embedding.get_text_embedding_batch(documents) + assert len(output) == len(documents) + assert all(2048 <= len(doc) < dimensions for doc in output) + + +@pytest.mark.integration() +@pytest.mark.parametrize("dimensions", [-1]) +def test_embed_query_invalid_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + query = "foo bar" + with pytest.raises(Exception) as exc: + NVIDIAEmbedding(model=model, dimensions=dimensions).get_query_embedding(query) + assert "400" in str(exc.value) + + +@pytest.mark.integration() +@pytest.mark.parametrize("dimensions", [-1]) +def test_embed_documents_invalid_dimensions(dimensions: int) -> None: + model = "nvidia/llama-3.2-nv-embedqa-1b-v2" + documents = ["foo bar", "bar foo"] + with pytest.raises(Exception) as exc: + NVIDIAEmbedding(model=model, dimensions=dimensions).get_text_embedding_batch( + documents + ) + assert "400" in str(exc.value) diff --git a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py index e0e595448460b..38b499baf4910 100644 --- a/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py +++ b/llama-index-integrations/postprocessor/llama-index-postprocessor-nvidia-rerank/llama_index/postprocessor/nvidia_rerank/base.py @@ -22,6 +22,7 @@ "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", "nvidia/llama-3.2-nv-rerankqa-1b-v1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v1/reranking", + "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", } dispatcher = get_dispatcher(__name__)