Skip to content

Commit

Permalink
Merge branch 'main' into avara1986/iast_error_metrics_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
avara1986 authored Sep 20, 2024
2 parents 2bbb62c + baac738 commit 5924f49
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/system-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ jobs:
# If ever it's needed, a valid key exists in the repo, using ${{ secrets.DD_API_KEY }}
DD_API_KEY: 1234567890abcdef1234567890abcdef
CMAKE_BUILD_PARALLEL_LEVEL: 12
AWS_ACCESS_KEY_ID: ${{ secrets.IDM_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.IDM_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: us-east-1
AWS_DEFAULT_REGION: us-east-1 # AWS services should use `AWS_REGION`, but some still use the older `AWS_DEFAULT_REGION`
steps:

- name: Checkout system tests
Expand Down Expand Up @@ -132,6 +136,10 @@ jobs:
# If ever it's needed, a valid key exists in the repo, using ${{ secrets.DD_API_KEY }}
DD_API_KEY: 1234567890abcdef1234567890abcdef
CMAKE_BUILD_PARALLEL_LEVEL: 12
AWS_ACCESS_KEY_ID: ${{ secrets.IDM_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.IDM_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: us-east-1
AWS_DEFAULT_REGION: us-east-1 # AWS services should use `AWS_REGION`, but some still use the older `AWS_DEFAULT_REGION`
steps:

- name: Checkout system tests
Expand Down
1 change: 1 addition & 0 deletions ddtrace/llmobs/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
INPUT_MESSAGES = "_ml_obs.meta.input.messages"
INPUT_VALUE = "_ml_obs.meta.input.value"
INPUT_PARAMETERS = "_ml_obs.meta.input.parameters"
INPUT_PROMPT = "_ml_obs.meta.input.prompt"

OUTPUT_DOCUMENTS = "_ml_obs.meta.output.documents"
OUTPUT_MESSAGES = "_ml_obs.meta.output.messages"
Expand Down
21 changes: 21 additions & 0 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
Expand All @@ -46,6 +47,7 @@
from ddtrace.llmobs._utils import _get_session_id
from ddtrace.llmobs._utils import _inject_llmobs_parent_id
from ddtrace.llmobs._utils import safe_json
from ddtrace.llmobs._utils import validate_prompt
from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
from ddtrace.llmobs._writer import LLMObsSpanWriter
from ddtrace.llmobs.utils import Documents
Expand Down Expand Up @@ -475,6 +477,7 @@ def annotate(
cls,
span: Optional[Span] = None,
parameters: Optional[Dict[str, Any]] = None,
prompt: Optional[dict] = None,
input_data: Optional[Any] = None,
output_data: Optional[Any] = None,
metadata: Optional[Dict[str, Any]] = None,
Expand All @@ -487,6 +490,8 @@ def annotate(
:param Span span: Span to annotate. If no span is provided, the current active span will be used.
Must be an LLMObs-type span, i.e. generated by the LLMObs SDK.
:param prompt: A dictionary that represents the prompt used for an LLM call in the following form:
{"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "value_1", ...}}.
:param input_data: A single input string, dictionary, or a list of dictionaries based on the span kind:
- llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."},
or a list of dictionaries with the same signature.
Expand Down Expand Up @@ -532,6 +537,12 @@ def annotate(
if not span_kind:
log.debug("Span kind not specified, skipping annotation for input/output data")
return
if prompt is not None:
if span_kind == "llm":
cls._tag_prompt(span, prompt)
else:
log.warning("Annotating prompts are only supported for LLM span kinds.")

if input_data or output_data:
if span_kind == "llm":
cls._tag_llm_io(span, input_messages=input_data, output_messages=output_data)
Expand All @@ -542,6 +553,16 @@ def annotate(
else:
cls._tag_text_io(span, input_value=input_data, output_value=output_data)

@staticmethod
def _tag_prompt(span, prompt: dict) -> None:
"""Tags a given LLMObs span with a prompt"""
try:
validated_prompt = validate_prompt(prompt)
span.set_tag_str(INPUT_PROMPT, safe_json(validated_prompt))
except TypeError:
log.warning("Failed to validate prompt with error: ", exc_info=True)
return

@staticmethod
def _tag_params(span: Span, params: Dict[str, Any]) -> None:
"""Tags input parameters for a given LLMObs span.
Expand Down
3 changes: 3 additions & 0 deletions ddtrace/llmobs/_trace_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
Expand Down Expand Up @@ -85,6 +86,8 @@ def _llmobs_span_event(self, span: Span) -> Dict[str, Any]:
meta["output"]["value"] = span._meta.pop(OUTPUT_VALUE)
if span_kind == "retrieval" and span.get_tag(OUTPUT_DOCUMENTS) is not None:
meta["output"]["documents"] = json.loads(span._meta.pop(OUTPUT_DOCUMENTS))
if span_kind == "llm" and span.get_tag(INPUT_PROMPT) is not None:
meta["input"]["prompt"] = json.loads(span._meta.pop(INPUT_PROMPT))
if span.error:
meta[ERROR_MSG] = span.get_tag(ERROR_MSG)
meta[ERROR_STACK] = span.get_tag(ERROR_STACK)
Expand Down
31 changes: 31 additions & 0 deletions ddtrace/llmobs/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
from typing import Dict
from typing import Optional
from typing import Union

import ddtrace
from ddtrace import Span
Expand All @@ -18,6 +20,35 @@
log = get_logger(__name__)


def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict]]:
validated_prompt = {} # type: Dict[str, Union[str, dict]]
if not isinstance(prompt, dict):
raise TypeError("Prompt must be a dictionary")
variables = prompt.get("variables")
template = prompt.get("template")
version = prompt.get("version")
prompt_id = prompt.get("id")
if variables is not None:
if not isinstance(variables, dict):
raise TypeError("Prompt variables must be a dictionary.")
if not any(isinstance(k, str) or isinstance(v, str) for k, v in variables.items()):
raise TypeError("Prompt variable keys and values must be strings.")
validated_prompt["variables"] = variables
if template is not None:
if not isinstance(template, str):
raise TypeError("Prompt template must be a string")
validated_prompt["template"] = template
if version is not None:
if not isinstance(version, str):
raise TypeError("Prompt version must be a string.")
validated_prompt["version"] = version
if prompt_id is not None:
if not isinstance(prompt_id, str):
raise TypeError("Prompt id must be a string.")
validated_prompt["id"] = prompt_id
return validated_prompt


class AnnotationContext:
def __init__(self, _tracer, _annotation_callback):
self._tracer = _tracer
Expand Down
1 change: 1 addition & 0 deletions ddtrace/llmobs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ExportedLLMObsSpan = TypedDict("ExportedLLMObsSpan", {"span_id": str, "trace_id": str})
Document = TypedDict("Document", {"name": str, "id": str, "text": str, "score": float}, total=False)
Message = TypedDict("Message", {"content": str, "role": str}, total=False)
Prompt = TypedDict("Prompt", {"variables": Dict[str, str], "template": str, "id": str, "version": str}, total=False)


class Messages:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
features:
- |
LLM Observability: Introduces prompt template annotation, which can be passed as an argument to `LLMObs.annotate(prompt={...})` for LLM span kinds.
For more information on prompt annotations, see https://docs.datadoghq.com/llm_observability/setup/sdk/#annotating-a-span.
60 changes: 60 additions & 0 deletions tests/llmobs/test_llmobs_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
Expand All @@ -28,6 +29,7 @@
from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
from ddtrace.llmobs._constants import TAGS
from ddtrace.llmobs._llmobs import LLMObsTraceProcessor
from ddtrace.llmobs.utils import Prompt
from tests.llmobs._utils import _expected_llmobs_eval_metric_event
from tests.llmobs._utils import _expected_llmobs_llm_span_event
from tests.llmobs._utils import _expected_llmobs_non_llm_span_event
Expand Down Expand Up @@ -746,6 +748,64 @@ def test_annotate_metrics_unserializable_uses_placeholder(LLMObs, mock_logs):
assert "[Unserializable object: <object object at" in metrics["content"]


def test_annotate_prompt_dict(LLMObs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(
span=span,
prompt={
"template": "{var1} {var3}",
"variables": {"var1": "var1", "var2": "var3"},
"version": "1.0.0",
"id": "test_prompt",
},
)
assert json.loads(span.get_tag(INPUT_PROMPT)) == {
"template": "{var1} {var3}",
"variables": {"var1": "var1", "var2": "var3"},
"version": "1.0.0",
"id": "test_prompt",
}


def test_annotate_prompt_typed_dict(LLMObs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(
span=span,
prompt=Prompt(
template="{var1} {var3}",
variables={"var1": "var1", "var2": "var3"},
version="1.0.0",
id="test_prompt",
),
)
assert json.loads(span.get_tag(INPUT_PROMPT)) == {
"template": "{var1} {var3}",
"variables": {"var1": "var1", "var2": "var3"},
"version": "1.0.0",
"id": "test_prompt",
}


def test_annotate_prompt_wrong_type(LLMObs, mock_logs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(span=span, prompt="prompt")
assert span.get_tag(INPUT_PROMPT) is None
mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
mock_logs.reset_mock()

LLMObs.annotate(span=span, prompt={"template": 1})
mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
mock_logs.reset_mock()


def test_annotate_prompt_wrong_kind(LLMObs, mock_logs):
with LLMObs.task(name="dummy") as span:
LLMObs.annotate(prompt={"variables": {"var1": "var1"}})
assert span.get_tag(INPUT_PROMPT) is None
mock_logs.warning.assert_called_once_with("Annotating prompts are only supported for LLM span kinds.")
mock_logs.reset_mock()


def test_span_error_sets_error(LLMObs, mock_llmobs_span_writer):
with pytest.raises(ValueError):
with LLMObs.llm(model_name="test_model", model_provider="test_model_provider") as span:
Expand Down
15 changes: 15 additions & 0 deletions tests/llmobs/test_llmobs_trace_processor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import json

import mock
import pytest

from ddtrace._trace.span import Span
from ddtrace.ext import SpanTypes
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import LANGCHAIN_APM_SPAN_NAME
from ddtrace.llmobs._constants import METADATA
Expand Down Expand Up @@ -326,6 +329,18 @@ def test_output_value_is_set():
assert tp._llmobs_span_event(llm_span)["meta"]["output"]["value"] == "value"


def test_prompt_is_set():
"""Test that prompt is set on the span event if they are present on the span."""
dummy_tracer = DummyTracer()
mock_llmobs_span_writer = mock.MagicMock()
with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
llm_span.set_tag(SPAN_KIND, "llm")
llm_span.set_tag(INPUT_PROMPT, json.dumps({"variables": {"var1": "var2"}}))
tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
assert tp._llmobs_span_event(llm_span)["meta"]["input"]["prompt"] == {"variables": {"var1": "var2"}}


def test_metadata_is_set():
"""Test that metadata is set on the span event if it is present on the span."""
dummy_tracer = DummyTracer()
Expand Down

0 comments on commit 5924f49

Please sign in to comment.