Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVIDIA Support for v2 Embedding & Reranking NIMs #17410

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"nvidia/nv-embedqa-e5-v5": "https://integrate.api.nvidia.com/v1/",
"baai/bge-m3": "https://integrate.api.nvidia.com/v1/",
"nvidia/llama-3.2-nv-embedqa-1b-v1": "https://integrate.api.nvidia.com/v1/",
"nvidia/llama-3.2-nv-embedqa-1b-v2": "https://integrate.api.nvidia.com/v1/",
}

KNOWN_URLS = list(MODEL_ENDPOINT_MAP.values())
Expand Down Expand Up @@ -65,6 +66,14 @@ class NVIDIAEmbedding(BaseEmbedding):
ge=0,
)

dimensions: Optional[int] = Field(
default=None,
description=(
"The number of dimensions for the embeddings. This parameter is not "
"supported by all models."
),
)

_client: Any = PrivateAttr()
_aclient: Any = PrivateAttr()
_is_hosted: bool = PrivateAttr(True)
Expand All @@ -74,6 +83,7 @@ def __init__(
model: Optional[str] = None,
timeout: Optional[float] = 120,
max_retries: Optional[int] = 5,
dimensions: Optional[int] = 0,
nvidia_api_key: Optional[str] = None,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
Expand All @@ -91,6 +101,8 @@ def __init__(
- model (str, optional): The name of the model to use for embeddings.
- timeout (float, optional): The timeout for requests to the NIM service, in seconds. Defaults to 120.
- max_retries (int, optional): The maximum number of retries for requests to the NIM service. Defaults to 5.
- dimensions (int, optional): The number of dimensions for the embeddings. This
parameter is not supported by all models.
- nvidia_api_key (str, optional): The API key for the NIM service. This is required if using a hosted NIM.
- api_key (str, optional): An alternative parameter for providing the API key.
- base_url (str, optional): The base URL for the NIM service. If not provided, the service will default to a hosted NIM.
Expand All @@ -106,8 +118,10 @@ def __init__(
model=model,
embed_batch_size=embed_batch_size,
callback_manager=callback_manager,
dimensions=dimensions,
**kwargs,
)
self.dimensions = dimensions

if embed_batch_size > 259:
raise ValueError("The batch size should not be larger than 259.")
Expand Down Expand Up @@ -235,23 +249,29 @@ def class_name(cls) -> str:

def _get_query_embedding(self, query: str) -> List[float]:
"""Get query embedding."""
extra_body = {"input_type": "passage", "truncate": self.truncate}
if self.dimensions:
extra_body["dimensions"] = self.dimensions
return (
self._client.embeddings.create(
input=[query],
model=self.model,
extra_body={"input_type": "query", "truncate": self.truncate},
extra_body=extra_body,
)
.data[0]
.embedding
)

def _get_text_embedding(self, text: str) -> List[float]:
"""Get text embedding."""
extra_body = {"input_type": "passage", "truncate": self.truncate}
if self.dimensions:
extra_body["dimensions"] = self.dimensions
return (
self._client.embeddings.create(
input=[text],
model=self.model,
extra_body={"input_type": "passage", "truncate": self.truncate},
extra_body=extra_body,
)
.data[0]
.embedding
Expand All @@ -260,11 +280,13 @@ def _get_text_embedding(self, text: str) -> List[float]:
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Get text embeddings."""
assert len(texts) <= 259, "The batch size should not be larger than 259."

extra_body = {"input_type": "passage", "truncate": self.truncate}
if self.dimensions:
extra_body["dimensions"] = self.dimensions
data = self._client.embeddings.create(
input=texts,
model=self.model,
extra_body={"input_type": "passage", "truncate": self.truncate},
extra_body=extra_body,
).data
return [d.embedding for d in data]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-embeddings-nvidia"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,64 @@ def test_basic(model: str, mode: dict) -> None:
assert isinstance(response, list)
assert len(response) > 0
assert isinstance(response[0], float)


## ================== nvidia/llama-3.2-nv-embedqa-1b-v2 model dimensions param test cases ==================
@pytest.mark.integration()
@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048])
def test_embed_text_with_dimensions(mode: dict, dimensions: int) -> None:
model = "nvidia/llama-3.2-nv-embedqa-1b-v2"
query = "foo bar"
embedding = NVIDIAEmbedding(model=model, dimensions=dimensions)
assert len(embedding.get_query_embedding(query)) == dimensions


@pytest.mark.integration()
@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048])
def test_embed_query_with_dimensions(dimensions: int) -> None:
model = "nvidia/llama-3.2-nv-embedqa-1b-v2"
query = "foo bar"
embedding = NVIDIAEmbedding(model=model, dimensions=dimensions)
assert len(embedding.get_query_embedding(query)) == dimensions


@pytest.mark.integration()
@pytest.mark.parametrize("dimensions", [102400])
def test_embed_query_with_large_dimensions(dimensions: int) -> None:
model = "nvidia/llama-3.2-nv-embedqa-1b-v2"
query = "foo bar"
embedding = NVIDIAEmbedding(model=model, dimensions=dimensions)
assert 2048 <= len(embedding.get_query_embedding(query)) < dimensions


@pytest.mark.integration()
@pytest.mark.parametrize("dimensions", [102400])
def test_embed_documents_with_large_dimensions(dimensions: int) -> None:
model = "nvidia/llama-3.2-nv-embedqa-1b-v2"
documents = ["foo bar", "bar foo"]
embedding = NVIDIAEmbedding(model=model, dimensions=dimensions)
output = embedding.get_text_embedding_batch(documents)
assert len(output) == len(documents)
assert all(2048 <= len(doc) < dimensions for doc in output)


@pytest.mark.integration()
@pytest.mark.parametrize("dimensions", [-1])
def test_embed_query_invalid_dimensions(dimensions: int) -> None:
model = "nvidia/llama-3.2-nv-embedqa-1b-v2"
query = "foo bar"
with pytest.raises(Exception) as exc:
NVIDIAEmbedding(model=model, dimensions=dimensions).get_query_embedding(query)
assert "400" in str(exc.value)


@pytest.mark.integration()
@pytest.mark.parametrize("dimensions", [-1])
def test_embed_documents_invalid_dimensions(dimensions: int) -> None:
model = "nvidia/llama-3.2-nv-embedqa-1b-v2"
documents = ["foo bar", "bar foo"]
with pytest.raises(Exception) as exc:
NVIDIAEmbedding(model=model, dimensions=dimensions).get_text_embedding_batch(
documents
)
assert "400" in str(exc.value)
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
"nvidia/llama-3.2-nv-rerankqa-1b-v1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v1/reranking",
"nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
}

dispatcher = get_dispatcher(__name__)
Expand Down
Loading