diff --git a/ddtrace/contrib/google_generativeai/__init__.py b/ddtrace/contrib/google_generativeai/__init__.py index 6255b46d086..03cb35c9eae 100644 --- a/ddtrace/contrib/google_generativeai/__init__.py +++ b/ddtrace/contrib/google_generativeai/__init__.py @@ -77,7 +77,7 @@ Pin.override(genai, service="my-gemini-service") """ # noqa: E501 -from ...internal.utils.importlib import require_modules +from ddtrace.internal.utils.importlib import require_modules required_modules = ["google.generativeai"] diff --git a/ddtrace/contrib/internal/google_generativeai/_utils.py b/ddtrace/contrib/internal/google_generativeai/_utils.py index 44fd1db7729..a4e46383828 100644 --- a/ddtrace/contrib/internal/google_generativeai/_utils.py +++ b/ddtrace/contrib/internal/google_generativeai/_utils.py @@ -30,6 +30,10 @@ def __iter__(self): else: tag_response(self._dd_span, self.__wrapped__, self._dd_integration, self._model_instance) finally: + if self._dd_integration.is_pc_sampled_llmobs(self._dd_span): + self._dd_integration.llmobs_set_tags( + self._dd_span, self._args, self._kwargs, self._model_instance, self.__wrapped__ + ) self._dd_span.finish() @@ -44,6 +48,10 @@ async def __aiter__(self): else: tag_response(self._dd_span, self.__wrapped__, self._dd_integration, self._model_instance) finally: + if self._dd_integration.is_pc_sampled_llmobs(self._dd_span): + self._dd_integration.llmobs_set_tags( + self._dd_span, self._args, self._kwargs, self._model_instance, self.__wrapped__ + ) self._dd_span.finish() diff --git a/ddtrace/contrib/internal/google_generativeai/patch.py b/ddtrace/contrib/internal/google_generativeai/patch.py index 43e30e5834d..eb131bb0bce 100644 --- a/ddtrace/contrib/internal/google_generativeai/patch.py +++ b/ddtrace/contrib/internal/google_generativeai/patch.py @@ -43,6 +43,7 @@ def traced_generate(genai, pin, func, instance, args, kwargs): "%s.%s" % (instance.__class__.__name__, func.__name__), provider="google", model=_extract_model_name(instance), + submit_to_llmobs=True, ) try: tag_request(span, integration, instance, args, kwargs) @@ -59,6 +60,8 @@ def traced_generate(genai, pin, func, instance, args, kwargs): finally: # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span, args, kwargs, instance, generations) span.finish() return generations @@ -73,6 +76,7 @@ async def traced_agenerate(genai, pin, func, instance, args, kwargs): "%s.%s" % (instance.__class__.__name__, func.__name__), provider="google", model=_extract_model_name(instance), + submit_to_llmobs=True, ) try: tag_request(span, integration, instance, args, kwargs) @@ -86,6 +90,8 @@ async def traced_agenerate(genai, pin, func, instance, args, kwargs): finally: # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span, args, kwargs, instance, generations) span.finish() return generations diff --git a/ddtrace/llmobs/_constants.py b/ddtrace/llmobs/_constants.py index 396747bf4e6..6c6c2ae8225 100644 --- a/ddtrace/llmobs/_constants.py +++ b/ddtrace/llmobs/_constants.py @@ -23,6 +23,7 @@ "Span started while LLMObs is disabled." " Spans will not be sent to LLM Observability." ) +GEMINI_APM_SPAN_NAME = "gemini.request" LANGCHAIN_APM_SPAN_NAME = "langchain.request" OPENAI_APM_SPAN_NAME = "openai.request" diff --git a/ddtrace/llmobs/_integrations/gemini.py b/ddtrace/llmobs/_integrations/gemini.py index 34f486cd0e8..ff2ff2e91ad 100644 --- a/ddtrace/llmobs/_integrations/gemini.py +++ b/ddtrace/llmobs/_integrations/gemini.py @@ -1,9 +1,25 @@ +import json from typing import Any from typing import Dict +from typing import Iterable +from typing import List from typing import Optional from ddtrace import Span +from ddtrace.internal.utils import get_argument_value +from ddtrace.llmobs._constants import INPUT_MESSAGES +from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import METADATA +from ddtrace.llmobs._constants import METRICS +from ddtrace.llmobs._constants import MODEL_NAME +from ddtrace.llmobs._constants import MODEL_PROVIDER +from ddtrace.llmobs._constants import OUTPUT_MESSAGES +from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import SPAN_KIND +from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._integrations.base import BaseLLMIntegration +from ddtrace.llmobs._utils import _get_attr +from ddtrace.llmobs._utils import _unserializable_default_repr class GeminiIntegration(BaseLLMIntegration): @@ -16,3 +32,127 @@ def _set_base_span_tags( span.set_tag_str("google_generativeai.request.provider", str(provider)) if model is not None: span.set_tag_str("google_generativeai.request.model", str(model)) + + def llmobs_set_tags( + self, span: Span, args: List[Any], kwargs: Dict[str, Any], instance: Any, generations: Any = None + ) -> None: + if not self.llmobs_enabled: + return + + span.set_tag_str(SPAN_KIND, "llm") + span.set_tag_str(MODEL_NAME, span.get_tag("google_generativeai.request.model") or "") + span.set_tag_str(MODEL_PROVIDER, span.get_tag("google_generativeai.request.provider") or "") + + metadata = self._llmobs_set_metadata(kwargs, instance) + span.set_tag_str(METADATA, json.dumps(metadata, default=_unserializable_default_repr)) + + system_instruction = _get_attr(instance, "_system_instruction", None) + input_contents = get_argument_value(args, kwargs, 0, "contents") + input_messages = self._extract_input_message(input_contents, system_instruction) + span.set_tag_str(INPUT_MESSAGES, json.dumps(input_messages, default=_unserializable_default_repr)) + + if span.error or generations is None: + span.set_tag_str(OUTPUT_MESSAGES, json.dumps([{"content": ""}])) + else: + output_messages = self._extract_output_message(generations) + span.set_tag_str(OUTPUT_MESSAGES, json.dumps(output_messages, default=_unserializable_default_repr)) + + usage = self._get_llmobs_metrics_tags(span) + if usage: + span.set_tag_str(METRICS, json.dumps(usage, default=_unserializable_default_repr)) + + @staticmethod + def _llmobs_set_metadata(kwargs, instance): + metadata = {} + model_config = instance._generation_config or {} + request_config = kwargs.get("generation_config", {}) + parameters = ("temperature", "max_output_tokens", "candidate_count", "top_p", "top_k") + for param in parameters: + model_config_value = _get_attr(model_config, param, None) + request_config_value = _get_attr(request_config, param, None) + if model_config_value or request_config_value: + metadata[param] = request_config_value or model_config_value + return metadata + + @staticmethod + def _extract_message_from_part(part, role): + text = _get_attr(part, "text", "") + function_call = _get_attr(part, "function_call", None) + function_response = _get_attr(part, "function_response", None) + message = {"content": text} + if role: + message["role"] = role + if function_call: + function_call_dict = function_call + if not isinstance(function_call, dict): + function_call_dict = type(function_call).to_dict(function_call) + message["tool_calls"] = [ + {"name": function_call_dict.get("name", ""), "arguments": function_call_dict.get("args", {})} + ] + if function_response: + function_response_dict = function_response + if not isinstance(function_response, dict): + function_response_dict = type(function_response).to_dict(function_response) + message["content"] = "[tool result: {}]".format(function_response_dict.get("response", "")) + return message + + def _extract_input_message(self, contents, system_instruction=None): + messages = [] + if system_instruction: + for part in system_instruction.parts: + messages.append({"content": part.text or "", "role": "system"}) + if isinstance(contents, str): + messages.append({"content": contents}) + return messages + if isinstance(contents, dict): + message = {"content": contents.get("text", "")} + if contents.get("role", None): + message["role"] = contents["role"] + messages.append(message) + return messages + if not isinstance(contents, list): + messages.append({"content": "[Non-text content object: {}]".format(repr(contents))}) + return messages + for content in contents: + if isinstance(content, str): + messages.append({"content": content}) + continue + role = _get_attr(content, "role", None) + parts = _get_attr(content, "parts", []) + if not parts or not isinstance(parts, Iterable): + message = {"content": "[Non-text content object: {}]".format(repr(content))} + if role: + message["role"] = role + messages.append(message) + continue + for part in parts: + message = self._extract_message_from_part(part, role) + messages.append(message) + return messages + + def _extract_output_message(self, generations): + output_messages = [] + generations_dict = generations.to_dict() + for candidate in generations_dict.get("candidates", []): + content = candidate.get("content", {}) + role = content.get("role", "model") + parts = content.get("parts", []) + for part in parts: + message = self._extract_message_from_part(part, role) + output_messages.append(message) + return output_messages + + @staticmethod + def _get_llmobs_metrics_tags(span): + usage = {} + input_tokens = span.get_metric("google_generativeai.response.usage.prompt_tokens") + output_tokens = span.get_metric("google_generativeai.response.usage.completion_tokens") + total_tokens = span.get_metric("google_generativeai.response.usage.total_tokens") + + if input_tokens is not None: + usage[INPUT_TOKENS_METRIC_KEY] = input_tokens + if output_tokens is not None: + usage[OUTPUT_TOKENS_METRIC_KEY] = output_tokens + if total_tokens is not None: + usage[TOTAL_TOKENS_METRIC_KEY] = total_tokens + return usage diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py index e317e15b5d8..bc6f15bad2e 100644 --- a/ddtrace/llmobs/_utils.py +++ b/ddtrace/llmobs/_utils.py @@ -5,6 +5,7 @@ from ddtrace import config from ddtrace.ext import SpanTypes from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._constants import GEMINI_APM_SPAN_NAME from ddtrace.llmobs._constants import LANGCHAIN_APM_SPAN_NAME from ddtrace.llmobs._constants import ML_APP from ddtrace.llmobs._constants import OPENAI_APM_SPAN_NAME @@ -46,7 +47,7 @@ def _get_llmobs_parent_id(span: Span) -> Optional[str]: def _get_span_name(span: Span) -> str: - if span.name == LANGCHAIN_APM_SPAN_NAME and span.resource != "": + if span.name in (LANGCHAIN_APM_SPAN_NAME, GEMINI_APM_SPAN_NAME) and span.resource != "": return span.resource elif span.name == OPENAI_APM_SPAN_NAME and span.resource != "": return "openai.{}".format(span.resource) diff --git a/releasenotes/notes/feat-llmobs-gemini-b65c714ceef9eb12.yaml b/releasenotes/notes/feat-llmobs-gemini-b65c714ceef9eb12.yaml new file mode 100644 index 00000000000..80ce9a87b93 --- /dev/null +++ b/releasenotes/notes/feat-llmobs-gemini-b65c714ceef9eb12.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + LLM Observability: Adds support to automatically submit Gemini Python SDK calls to LLM Observability. diff --git a/tests/contrib/google_generativeai/conftest.py b/tests/contrib/google_generativeai/conftest.py index 1a4a5c057a6..7da872255c3 100644 --- a/tests/contrib/google_generativeai/conftest.py +++ b/tests/contrib/google_generativeai/conftest.py @@ -1,9 +1,11 @@ import os +import mock import pytest from ddtrace.contrib.google_generativeai import patch from ddtrace.contrib.google_generativeai import unpatch +from ddtrace.llmobs import LLMObs from ddtrace.pin import Pin from tests.contrib.google_generativeai.utils import MockGenerativeModelAsyncClient from tests.contrib.google_generativeai.utils import MockGenerativeModelClient @@ -35,11 +37,27 @@ def mock_tracer(ddtrace_global_config, genai): mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) pin.override(genai, tracer=mock_tracer) pin.tracer.configure() + if ddtrace_global_config.get("_llmobs_enabled", False): + # Have to disable and re-enable LLMObs to use to mock tracer. + LLMObs.disable() + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) yield mock_tracer except Exception: yield +@pytest.fixture +def mock_llmobs_writer(): + patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") + try: + LLMObsSpanWriterMock = patcher.start() + m = mock.MagicMock() + LLMObsSpanWriterMock.return_value = m + yield m + finally: + patcher.stop() + + @pytest.fixture def mock_client(): yield MockGenerativeModelClient() diff --git a/tests/contrib/google_generativeai/test_google_generativeai_llmobs.py b/tests/contrib/google_generativeai/test_google_generativeai_llmobs.py new file mode 100644 index 00000000000..070ffd03d36 --- /dev/null +++ b/tests/contrib/google_generativeai/test_google_generativeai_llmobs.py @@ -0,0 +1,620 @@ +import os + +from google.api_core.exceptions import InvalidArgument +import mock +from PIL import Image +import pytest + +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_IMG_CALL +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_SIMPLE_1 +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_SIMPLE_2 +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_SIMPLE_SYSTEM +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_STREAM_CHUNKS +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_TOOL_CALL +from tests.contrib.google_generativeai.utils import MOCK_COMPLETION_TOOL_CALL_STREAM_CHUNKS +from tests.contrib.google_generativeai.utils import _async_streamed_response +from tests.contrib.google_generativeai.utils import _mock_completion_response +from tests.contrib.google_generativeai.utils import _mock_completion_stream_chunk +from tests.contrib.google_generativeai.utils import set_light_values +from tests.llmobs._utils import _expected_llmobs_llm_span_event + + +@pytest.mark.parametrize( + "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="")] +) +class TestLLMObsGemini: + def test_completion(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + mock_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1)) + llm = genai.GenerativeModel("gemini-1.5-flash") + llm.generate_content( + "What is the argument for LeBron James being the GOAT?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=35, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "What is the argument for LeBron James being the GOAT?"}], + output_messages=[ + {"content": MOCK_COMPLETION_SIMPLE_1["candidates"][0]["content"]["parts"][0]["text"], "role": "model"}, + ], + metadata={"temperature": 1.0, "max_output_tokens": 35}, + token_metrics={"input_tokens": 12, "output_tokens": 30, "total_tokens": 42}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_completion_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1)) + llm = genai.GenerativeModel("gemini-1.5-flash") + await llm.generate_content_async( + "What is the argument for LeBron James being the GOAT?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=35, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "What is the argument for LeBron James being the GOAT?"}], + output_messages=[ + {"content": MOCK_COMPLETION_SIMPLE_1["candidates"][0]["content"]["parts"][0]["text"], "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 35}, + token_metrics={"input_tokens": 12, "output_tokens": 30, "total_tokens": 42}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_completion_error(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + llm = genai.GenerativeModel("gemini-1.5-flash") + llm._client = mock.Mock() + llm._client.generate_content.side_effect = InvalidArgument("Invalid API key. Please pass a valid API key.") + with pytest.raises(InvalidArgument): + llm.generate_content( + "What is the argument for LeBron James being the GOAT?", + generation_config=genai.types.GenerationConfig( + stop_sequences=["x"], max_output_tokens=35, temperature=1.0 + ), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "What is the argument for LeBron James being the GOAT?"}], + output_messages=[{"content": ""}], + error="google.api_core.exceptions.InvalidArgument", + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), + metadata={"temperature": 1.0, "max_output_tokens": 35}, + tags={"ml_app": ""}, + integration="gemini", + ) + ) + + async def test_completion_error_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + llm = genai.GenerativeModel("gemini-1.5-flash") + llm._async_client = mock.Mock() + llm._async_client.generate_content.side_effect = InvalidArgument( + "Invalid API key. Please pass a valid API key." + ) + with pytest.raises(InvalidArgument): + await llm.generate_content_async( + "What is the argument for LeBron James being the GOAT?", + generation_config=genai.types.GenerationConfig( + stop_sequences=["x"], max_output_tokens=35, temperature=1.0 + ), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "What is the argument for LeBron James being the GOAT?"}], + output_messages=[{"content": ""}], + error="google.api_core.exceptions.InvalidArgument", + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), + metadata={"temperature": 1.0, "max_output_tokens": 35}, + tags={"ml_app": ""}, + integration="gemini", + ) + ) + + def test_completion_multiple_messages( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer + ): + mock_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_2)) + llm = genai.GenerativeModel("gemini-1.5-flash") + llm.generate_content( + [ + {"role": "user", "parts": [{"text": "Hello world!"}]}, + {"role": "model", "parts": [{"text": "Great to meet you. What would you like to know?"}]}, + {"role": "user", "parts": [{"text": "Why is the sky blue?"}]}, + ], + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=35, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + {"content": "Hello world!", "role": "user"}, + {"content": "Great to meet you. What would you like to know?", "role": "model"}, + {"content": "Why is the sky blue?", "role": "user"}, + ], + output_messages=[ + {"content": MOCK_COMPLETION_SIMPLE_2["candidates"][0]["content"]["parts"][0]["text"], "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 35}, + token_metrics={"input_tokens": 24, "output_tokens": 35, "total_tokens": 59}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_completion_multiple_messages_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_2)) + llm = genai.GenerativeModel("gemini-1.5-flash") + await llm.generate_content_async( + [ + {"role": "user", "parts": [{"text": "Hello world!"}]}, + {"role": "model", "parts": [{"text": "Great to meet you. What would you like to know?"}]}, + {"role": "user", "parts": [{"text": "Why is the sky blue?"}]}, + ], + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=35, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + {"content": "Hello world!", "role": "user"}, + {"content": "Great to meet you. What would you like to know?", "role": "model"}, + {"content": "Why is the sky blue?", "role": "user"}, + ], + output_messages=[ + {"content": MOCK_COMPLETION_SIMPLE_2["candidates"][0]["content"]["parts"][0]["text"], "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 35}, + token_metrics={"input_tokens": 24, "output_tokens": 35, "total_tokens": 59}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_chat_completion(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + mock_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_2)) + llm = genai.GenerativeModel("gemini-1.5-flash") + chat = llm.start_chat( + history=[ + {"role": "user", "parts": "Hello world!"}, + {"role": "model", "parts": "Great to meet you. What would you like to know?"}, + ] + ) + chat.send_message( + "Why is the sky blue?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=35, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + {"content": "Hello world!", "role": "user"}, + {"content": "Great to meet you. What would you like to know?", "role": "model"}, + {"content": "Why is the sky blue?", "role": "user"}, + ], + output_messages=[ + {"content": MOCK_COMPLETION_SIMPLE_2["candidates"][0]["content"]["parts"][0]["text"], "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 35}, + token_metrics={"input_tokens": 24, "output_tokens": 35, "total_tokens": 59}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_chat_completion_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_2)) + llm = genai.GenerativeModel("gemini-1.5-flash") + chat = llm.start_chat( + history=[ + {"role": "user", "parts": "Hello world!"}, + {"role": "model", "parts": "Great to meet you. What would you like to know?"}, + ] + ) + await chat.send_message_async( + "Why is the sky blue?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=35, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + {"content": "Hello world!", "role": "user"}, + {"content": "Great to meet you. What would you like to know?", "role": "model"}, + {"content": "Why is the sky blue?", "role": "user"}, + ], + output_messages=[ + {"content": MOCK_COMPLETION_SIMPLE_2["candidates"][0]["content"]["parts"][0]["text"], "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 35}, + token_metrics={"input_tokens": 24, "output_tokens": 35, "total_tokens": 59}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_completion_system_prompt(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + mock_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_SYSTEM)) + llm = genai.GenerativeModel( + "gemini-1.5-flash", + system_instruction="You are a die-hard Michael Jordan fan that always brings stats to the discussion.", + ) + llm.generate_content( + "What is the argument for LeBron James being the GOAT?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=50, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + { + "content": "You are a die-hard Michael Jordan fan that always brings stats to the discussion.", + "role": "system", + }, + {"content": "What is the argument for LeBron James being the GOAT?"}, + ], + output_messages=[ + { + "content": MOCK_COMPLETION_SIMPLE_SYSTEM["candidates"][0]["content"]["parts"][0]["text"], + "role": "model", + } + ], + metadata={"temperature": 1.0, "max_output_tokens": 50}, + token_metrics={"input_tokens": 29, "output_tokens": 45, "total_tokens": 74}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_completion_system_prompt_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_SYSTEM)) + llm = genai.GenerativeModel( + "gemini-1.5-flash", + system_instruction="You are a die-hard Michael Jordan fan that always brings stats to the discussion.", + ) + await llm.generate_content_async( + "What is the argument for LeBron James being the GOAT?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=50, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + { + "content": "You are a die-hard Michael Jordan fan that always brings stats to the discussion.", + "role": "system", + }, + {"content": "What is the argument for LeBron James being the GOAT?"}, + ], + output_messages=[ + { + "content": MOCK_COMPLETION_SIMPLE_SYSTEM["candidates"][0]["content"]["parts"][0]["text"], + "role": "model", + }, + ], + metadata={"temperature": 1.0, "max_output_tokens": 50}, + token_metrics={"input_tokens": 29, "output_tokens": 45, "total_tokens": 74}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_completion_stream(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + mock_client.responses["stream_generate_content"] = [ + (_mock_completion_stream_chunk(chunk) for chunk in MOCK_COMPLETION_STREAM_CHUNKS) + ] + llm = genai.GenerativeModel("gemini-1.5-flash") + response = llm.generate_content( + "Can you recite the alphabet?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=60, temperature=1.0), + stream=True, + ) + for _ in response: + pass + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "Can you recite the alphabet?"}], + output_messages=[ + {"content": "".join(chunk["text"] for chunk in MOCK_COMPLETION_STREAM_CHUNKS), "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 60}, + token_metrics={"input_tokens": 6, "output_tokens": 52, "total_tokens": 58}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_completion_stream_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["stream_generate_content"] = [ + _async_streamed_response(MOCK_COMPLETION_STREAM_CHUNKS) + ] + llm = genai.GenerativeModel("gemini-1.5-flash") + response = await llm.generate_content_async( + "Can you recite the alphabet?", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=60, temperature=1.0), + stream=True, + ) + async for _ in response: + pass + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "Can you recite the alphabet?"}], + output_messages=[ + {"content": "".join(chunk["text"] for chunk in MOCK_COMPLETION_STREAM_CHUNKS), "role": "model"} + ], + metadata={"temperature": 1.0, "max_output_tokens": 60}, + token_metrics={"input_tokens": 6, "output_tokens": 52, "total_tokens": 58}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_completion_tool_call(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + mock_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_TOOL_CALL)) + llm = genai.GenerativeModel("gemini-1.5-flash", tools=[set_light_values]) + llm.generate_content( + "Dim the lights so the room feels cozy and warm.", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=30, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "Dim the lights so the room feels cozy and warm."}], + output_messages=[ + { + "content": "", + "role": "model", + "tool_calls": [ + { + "name": "set_light_values", + "arguments": { + "fields": [{"key": "color_temp", "value": "warm"}, {"key": "brightness", "value": 50}] + }, + } + ], + } + ], + metadata={"temperature": 1.0, "max_output_tokens": 30}, + token_metrics={"input_tokens": 150, "output_tokens": 25, "total_tokens": 175}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_completion_tool_call_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_TOOL_CALL)) + llm = genai.GenerativeModel("gemini-1.5-flash", tools=[set_light_values]) + await llm.generate_content_async( + "Dim the lights so the room feels cozy and warm.", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=30, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "Dim the lights so the room feels cozy and warm."}], + output_messages=[ + { + "content": "", + "role": "model", + "tool_calls": [ + { + "name": "set_light_values", + "arguments": { + "fields": [{"key": "color_temp", "value": "warm"}, {"key": "brightness", "value": 50}] + }, + } + ], + } + ], + metadata={"temperature": 1.0, "max_output_tokens": 30}, + token_metrics={"input_tokens": 150, "output_tokens": 25, "total_tokens": 175}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_gemini_completion_tool_stream( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer + ): + mock_client.responses["stream_generate_content"] = [ + (_mock_completion_stream_chunk(chunk) for chunk in MOCK_COMPLETION_TOOL_CALL_STREAM_CHUNKS) + ] + llm = genai.GenerativeModel("gemini-1.5-flash", tools=[set_light_values]) + response = llm.generate_content( + "Dim the lights so the room feels cozy and warm.", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=30, temperature=1.0), + stream=True, + ) + for _ in response: + pass + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "Dim the lights so the room feels cozy and warm."}], + output_messages=[ + { + "content": "", + "role": "model", + "tool_calls": [ + { + "name": "set_light_values", + "arguments": { + "fields": [{"key": "color_temp", "value": "warm"}, {"key": "brightness", "value": 50}] + }, + } + ], + } + ], + metadata={"temperature": 1.0, "max_output_tokens": 30}, + token_metrics={"input_tokens": 150, "output_tokens": 25, "total_tokens": 175}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_gemini_completion_tool_stream_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + mock_client_async.responses["stream_generate_content"] = [ + _async_streamed_response(MOCK_COMPLETION_TOOL_CALL_STREAM_CHUNKS) + ] + llm = genai.GenerativeModel("gemini-1.5-flash", tools=[set_light_values]) + response = await llm.generate_content_async( + "Dim the lights so the room feels cozy and warm.", + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=30, temperature=1.0), + stream=True, + ) + async for _ in response: + pass + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[{"content": "Dim the lights so the room feels cozy and warm."}], + output_messages=[ + { + "content": "", + "role": "model", + "tool_calls": [ + { + "name": "set_light_values", + "arguments": { + "fields": [{"key": "color_temp", "value": "warm"}, {"key": "brightness", "value": 50}] + }, + } + ], + } + ], + metadata={"temperature": 1.0, "max_output_tokens": 30}, + token_metrics={"input_tokens": 150, "output_tokens": 25, "total_tokens": 175}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + def test_gemini_completion_image(self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client, mock_tracer): + """Ensure passing images to generate_content() won't break patching.""" + img = Image.open(os.path.join(os.path.dirname(__file__), "test_data/apple.jpg")) + mock_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_IMG_CALL)) + llm = genai.GenerativeModel("gemini-1.5-flash") + llm.generate_content( + [img, "Return a bounding box for the apple. \n [ymin, xmin, ymax, xmax]"], + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=30, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + {"content": "[Non-text content object: {}]".format(repr(img))}, + {"content": "Return a bounding box for the apple. \n [ymin, xmin, ymax, xmax]"}, + ], + output_messages=[{"content": "57 100 900 911", "role": "model"}], + metadata={"temperature": 1.0, "max_output_tokens": 30}, + token_metrics={"input_tokens": 277, "output_tokens": 14, "total_tokens": 291}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) + + async def test_gemini_completion_image_async( + self, genai, ddtrace_global_config, mock_llmobs_writer, mock_client_async, mock_tracer + ): + """Ensure passing images to generate_content() won't break patching.""" + img = Image.open(os.path.join(os.path.dirname(__file__), "test_data/apple.jpg")) + mock_client_async.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_IMG_CALL)) + llm = genai.GenerativeModel("gemini-1.5-flash") + await llm.generate_content_async( + [img, "Return a bounding box for the apple. \n [ymin, xmin, ymax, xmax]"], + generation_config=genai.types.GenerationConfig(stop_sequences=["x"], max_output_tokens=30, temperature=1.0), + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + expected_llmobs_span_event = _expected_llmobs_llm_span_event( + span, + model_name="gemini-1.5-flash", + model_provider="google", + input_messages=[ + {"content": "[Non-text content object: {}]".format(repr(img))}, + {"content": "Return a bounding box for the apple. \n [ymin, xmin, ymax, xmax]"}, + ], + output_messages=[{"content": "57 100 900 911", "role": "model"}], + metadata={"temperature": 1.0, "max_output_tokens": 30}, + token_metrics={"input_tokens": 277, "output_tokens": 14, "total_tokens": 291}, + tags={"ml_app": ""}, + integration="gemini", + ) + mock_llmobs_writer.enqueue.assert_called_with(expected_llmobs_span_event) diff --git a/tests/llmobs/_utils.py b/tests/llmobs/_utils.py index e8cc03d6ee7..47d8891950e 100644 --- a/tests/llmobs/_utils.py +++ b/tests/llmobs/_utils.py @@ -1,21 +1,29 @@ import os import mock -import vcr + + +try: + import vcr +except ImportError: + vcr = None import ddtrace from ddtrace._trace.span import Span from ddtrace.ext import SpanTypes -logs_vcr = vcr.VCR( - cassette_library_dir=os.path.join(os.path.dirname(__file__), "llmobs_cassettes/"), - record_mode="once", - match_on=["path"], - filter_headers=[("DD-API-KEY", "XXXXXX")], - # Ignore requests to the agent - ignore_localhost=True, -) +if vcr: + logs_vcr = vcr.VCR( + cassette_library_dir=os.path.join(os.path.dirname(__file__), "llmobs_cassettes/"), + record_mode="once", + match_on=["path"], + filter_headers=[("DD-API-KEY", "XXXXXX")], + # Ignore requests to the agent + ignore_localhost=True, + ) +else: + logs_vcr = None def _expected_llmobs_tags(span, error=None, tags=None, session_id=None): @@ -180,7 +188,7 @@ def _llmobs_base_span_event( integration=None, ): span_name = span.name - if integration == "langchain": + if integration in ("langchain", "gemini"): span_name = span.resource elif integration == "openai": span_name = "openai.{}".format(span.resource) @@ -189,12 +197,12 @@ def _llmobs_base_span_event( "span_id": str(span.span_id), "parent_id": _get_llmobs_parent_id(span), "name": span_name, - "tags": _expected_llmobs_tags(span, tags=tags, error=error, session_id=session_id), "start_ns": span.start_ns, "duration": span.duration_ns, "status": "error" if error else "ok", "meta": {"span.kind": span_kind}, "metrics": {}, + "tags": _expected_llmobs_tags(span, tags=tags, error=error, session_id=session_id), } if session_id: span_event["session_id"] = session_id