diff --git a/.riot/requirements/76db01b.txt b/.riot/requirements/121ef70.txt similarity index 77% rename from .riot/requirements/76db01b.txt rename to .riot/requirements/121ef70.txt index fcb99744e02..04f719ea2bd 100644 --- a/.riot/requirements/76db01b.txt +++ b/.riot/requirements/121ef70.txt @@ -2,41 +2,45 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --no-annotate .riot/requirements/76db01b.in +# pip-compile --no-annotate .riot/requirements/121ef70.in # -ai21==2.4.0 +ai21==2.4.1 ai21-tokenizer==0.9.1 aiohttp==3.9.5 aiosignal==1.3.1 annotated-types==0.7.0 +anthropic==0.28.0 anyio==4.4.0 attrs==23.2.0 -boto3==1.34.114 -botocore==1.34.114 -certifi==2024.2.2 +boto3==1.34.120 +botocore==1.34.120 +certifi==2024.6.2 charset-normalizer==3.3.2 -cohere==5.5.3 +cohere==5.5.4 coverage[toml]==7.5.3 dataclasses-json==0.6.6 +defusedxml==0.7.1 distro==1.9.0 exceptiongroup==1.2.1 fastavro==1.9.4 filelock==3.14.0 frozenlist==1.4.1 -fsspec==2024.5.0 +fsspec==2024.6.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 httpx-sse==0.4.0 -huggingface-hub==0.23.2 +huggingface-hub==0.23.3 hypothesis==6.45.0 idna==3.7 iniconfig==2.0.0 +jiter==0.4.1 jmespath==1.0.1 jsonpatch==1.33 jsonpointer==2.4 langchain==0.1.20 +langchain-anthropic==0.1.11 langchain-aws==0.1.6 langchain-community==0.0.38 langchain-core==0.1.52 @@ -57,9 +61,9 @@ packaging==23.2 pinecone-client==3.2.2 pluggy==1.5.0 psutil==5.9.8 -pydantic==2.7.2 -pydantic-core==2.18.3 -pytest==8.2.1 +pydantic==2.7.3 +pydantic-core==2.18.4 +pytest==8.2.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -76,10 +80,10 @@ sortedcontainers==2.4.0 sqlalchemy==2.0.30 tenacity==8.3.0 tiktoken==0.7.0 -tokenizers==0.19.1 +tokenizers==0.15.2 tqdm==4.66.4 -types-requests==2.32.0.20240523 -typing-extensions==4.12.0 +types-requests==2.32.0.20240602 +typing-extensions==4.12.1 typing-inspect==0.9.0 urllib3==2.2.1 vcrpy==6.0.1 diff --git a/.riot/requirements/e17f33e.txt b/.riot/requirements/144795f.txt similarity index 68% rename from .riot/requirements/e17f33e.txt rename to .riot/requirements/144795f.txt index b8340758464..20157a104fc 100644 --- a/.riot/requirements/e17f33e.txt +++ b/.riot/requirements/144795f.txt @@ -2,65 +2,69 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --no-annotate .riot/requirements/e17f33e.in +# pip-compile --no-annotate .riot/requirements/144795f.in # -ai21==2.4.0 +ai21==2.4.1 ai21-tokenizer==0.9.1 aiohttp==3.9.5 aiosignal==1.3.1 annotated-types==0.7.0 +anthropic==0.28.0 anyio==4.4.0 async-timeout==4.0.3 attrs==23.2.0 -boto3==1.34.114 -botocore==1.34.114 -certifi==2024.2.2 +boto3==1.34.120 +botocore==1.34.120 +certifi==2024.6.2 charset-normalizer==3.3.2 -cohere==5.5.3 +cohere==5.5.4 coverage[toml]==7.5.3 dataclasses-json==0.6.6 +defusedxml==0.7.1 distro==1.9.0 exceptiongroup==1.2.1 fastavro==1.9.4 filelock==3.14.0 frozenlist==1.4.1 -fsspec==2024.5.0 +fsspec==2024.6.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 httpx-sse==0.4.0 -huggingface-hub==0.23.2 +huggingface-hub==0.23.3 hypothesis==6.45.0 idna==3.7 iniconfig==2.0.0 +jiter==0.4.1 jmespath==1.0.1 jsonpatch==1.33 jsonpointer==2.4 -langchain==0.2.1 +langchain==0.2.2 +langchain-anthropic==0.1.15 langchain-aws==0.1.6 -langchain-community==0.2.1 -langchain-core==0.2.1 -langchain-openai==0.1.7 +langchain-community==0.2.3 +langchain-core==0.2.4 +langchain-openai==0.1.8 langchain-pinecone==0.1.1 -langchain-text-splitters==0.2.0 -langsmith==0.1.63 +langchain-text-splitters==0.2.1 +langsmith==0.1.73 marshmallow==3.21.2 mock==5.1.0 multidict==6.0.5 mypy-extensions==1.0.0 numexpr==2.10.0 numpy==1.26.4 -openai==1.30.4 +openai==1.31.1 opentracing==2.4.0 orjson==3.10.3 packaging==23.2 pinecone-client==3.2.2 pluggy==1.5.0 psutil==5.9.8 -pydantic==2.7.2 -pydantic-core==2.18.3 -pytest==8.2.1 +pydantic==2.7.3 +pydantic-core==2.18.4 +pytest==8.2.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -77,11 +81,11 @@ sortedcontainers==2.4.0 sqlalchemy==2.0.30 tenacity==8.3.0 tiktoken==0.7.0 -tokenizers==0.19.1 +tokenizers==0.15.2 tomli==2.0.1 tqdm==4.66.4 -types-requests==2.32.0.20240523 -typing-extensions==4.12.0 +types-requests==2.32.0.20240602 +typing-extensions==4.12.1 typing-inspect==0.9.0 urllib3==2.2.1 vcrpy==6.0.1 diff --git a/.riot/requirements/8297334.txt b/.riot/requirements/16311ec.txt similarity index 79% rename from .riot/requirements/8297334.txt rename to .riot/requirements/16311ec.txt index 2e1342e47e6..a8d1d520ad8 100644 --- a/.riot/requirements/8297334.txt +++ b/.riot/requirements/16311ec.txt @@ -2,43 +2,47 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --no-annotate .riot/requirements/8297334.in +# pip-compile --no-annotate .riot/requirements/16311ec.in # -ai21==2.4.0 +ai21==2.4.1 ai21-tokenizer==0.9.1 aiohttp==3.9.5 aiosignal==1.3.1 annotated-types==0.7.0 +anthropic==0.28.0 anyio==4.4.0 async-timeout==4.0.3 attrs==23.2.0 -boto3==1.34.114 -botocore==1.34.114 -certifi==2024.2.2 +boto3==1.34.120 +botocore==1.34.120 +certifi==2024.6.2 charset-normalizer==3.3.2 -cohere==5.5.3 +cohere==5.5.4 coverage[toml]==7.5.3 dataclasses-json==0.6.6 +defusedxml==0.7.1 distro==1.9.0 exceptiongroup==1.2.1 fastavro==1.9.4 filelock==3.14.0 frozenlist==1.4.1 -fsspec==2024.5.0 +fsspec==2024.6.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 httpx-sse==0.4.0 -huggingface-hub==0.23.2 +huggingface-hub==0.23.3 hypothesis==6.45.0 idna==3.7 importlib-metadata==7.1.0 iniconfig==2.0.0 +jiter==0.4.1 jmespath==1.0.1 jsonpatch==1.33 jsonpointer==2.4 langchain==0.1.20 +langchain-anthropic==0.1.11 langchain-aws==0.1.6 langchain-community==0.0.38 langchain-core==0.1.52 @@ -59,9 +63,9 @@ packaging==23.2 pinecone-client==3.2.2 pluggy==1.5.0 psutil==5.9.8 -pydantic==2.7.2 -pydantic-core==2.18.3 -pytest==8.2.1 +pydantic==2.7.3 +pydantic-core==2.18.4 +pytest==8.2.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -78,15 +82,15 @@ sortedcontainers==2.4.0 sqlalchemy==2.0.30 tenacity==8.3.0 tiktoken==0.7.0 -tokenizers==0.19.1 +tokenizers==0.15.2 tomli==2.0.1 tqdm==4.66.4 types-requests==2.31.0.6 types-urllib3==1.26.25.14 -typing-extensions==4.12.0 +typing-extensions==4.12.1 typing-inspect==0.9.0 urllib3==1.26.18 vcrpy==6.0.1 wrapt==1.16.0 yarl==1.9.4 -zipp==3.19.0 +zipp==3.19.2 diff --git a/.riot/requirements/17e8568.txt b/.riot/requirements/1bd8488.txt similarity index 71% rename from .riot/requirements/17e8568.txt rename to .riot/requirements/1bd8488.txt index 9c1a89830fe..7bde4c4488b 100644 --- a/.riot/requirements/17e8568.txt +++ b/.riot/requirements/1bd8488.txt @@ -2,66 +2,70 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --no-annotate .riot/requirements/17e8568.in +# pip-compile --no-annotate .riot/requirements/1bd8488.in # -ai21==2.4.0 +ai21==2.4.1 ai21-tokenizer==0.9.1 aiohttp==3.9.5 aiosignal==1.3.1 annotated-types==0.7.0 +anthropic==0.28.0 anyio==4.4.0 async-timeout==4.0.3 attrs==23.2.0 -boto3==1.34.114 -botocore==1.34.114 -certifi==2024.2.2 +boto3==1.34.120 +botocore==1.34.120 +certifi==2024.6.2 charset-normalizer==3.3.2 -cohere==5.5.3 +cohere==5.5.4 coverage[toml]==7.5.3 dataclasses-json==0.6.6 +defusedxml==0.7.1 distro==1.9.0 exceptiongroup==1.2.1 fastavro==1.9.4 filelock==3.14.0 frozenlist==1.4.1 -fsspec==2024.5.0 +fsspec==2024.6.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 httpx-sse==0.4.0 -huggingface-hub==0.23.2 +huggingface-hub==0.23.3 hypothesis==6.45.0 idna==3.7 importlib-metadata==7.1.0 iniconfig==2.0.0 +jiter==0.4.1 jmespath==1.0.1 jsonpatch==1.33 jsonpointer==2.4 -langchain==0.2.1 +langchain==0.2.2 +langchain-anthropic==0.1.15 langchain-aws==0.1.6 -langchain-community==0.2.1 -langchain-core==0.2.1 -langchain-openai==0.1.7 +langchain-community==0.2.3 +langchain-core==0.2.4 +langchain-openai==0.1.8 langchain-pinecone==0.1.1 -langchain-text-splitters==0.2.0 -langsmith==0.1.63 +langchain-text-splitters==0.2.1 +langsmith==0.1.73 marshmallow==3.21.2 mock==5.1.0 multidict==6.0.5 mypy-extensions==1.0.0 numexpr==2.10.0 numpy==1.26.4 -openai==1.30.4 +openai==1.31.1 opentracing==2.4.0 orjson==3.10.3 packaging==23.2 pinecone-client==3.2.2 pluggy==1.5.0 psutil==5.9.8 -pydantic==2.7.2 -pydantic-core==2.18.3 -pytest==8.2.1 +pydantic==2.7.3 +pydantic-core==2.18.4 +pytest==8.2.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -78,15 +82,15 @@ sortedcontainers==2.4.0 sqlalchemy==2.0.30 tenacity==8.3.0 tiktoken==0.7.0 -tokenizers==0.19.1 +tokenizers==0.15.2 tomli==2.0.1 tqdm==4.66.4 types-requests==2.31.0.6 types-urllib3==1.26.25.14 -typing-extensions==4.12.0 +typing-extensions==4.12.1 typing-inspect==0.9.0 urllib3==1.26.18 vcrpy==6.0.1 wrapt==1.16.0 yarl==1.9.4 -zipp==3.19.0 +zipp==3.19.2 diff --git a/.riot/requirements/ee6f953.txt b/.riot/requirements/447443e.txt similarity index 68% rename from .riot/requirements/ee6f953.txt rename to .riot/requirements/447443e.txt index d2830024f5c..664a5354e77 100644 --- a/.riot/requirements/ee6f953.txt +++ b/.riot/requirements/447443e.txt @@ -2,64 +2,68 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --no-annotate .riot/requirements/ee6f953.in +# pip-compile --no-annotate .riot/requirements/447443e.in # -ai21==2.4.0 +ai21==2.4.1 ai21-tokenizer==0.9.1 aiohttp==3.9.5 aiosignal==1.3.1 annotated-types==0.7.0 +anthropic==0.28.0 anyio==4.4.0 attrs==23.2.0 -boto3==1.34.114 -botocore==1.34.114 -certifi==2024.2.2 +boto3==1.34.120 +botocore==1.34.120 +certifi==2024.6.2 charset-normalizer==3.3.2 -cohere==5.5.3 +cohere==5.5.4 coverage[toml]==7.5.3 dataclasses-json==0.6.6 +defusedxml==0.7.1 distro==1.9.0 exceptiongroup==1.2.1 fastavro==1.9.4 filelock==3.14.0 frozenlist==1.4.1 -fsspec==2024.5.0 +fsspec==2024.6.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 httpx-sse==0.4.0 -huggingface-hub==0.23.2 +huggingface-hub==0.23.3 hypothesis==6.45.0 idna==3.7 iniconfig==2.0.0 +jiter==0.4.1 jmespath==1.0.1 jsonpatch==1.33 jsonpointer==2.4 -langchain==0.2.1 +langchain==0.2.2 +langchain-anthropic==0.1.15 langchain-aws==0.1.6 -langchain-community==0.2.1 -langchain-core==0.2.1 -langchain-openai==0.1.7 +langchain-community==0.2.3 +langchain-core==0.2.4 +langchain-openai==0.1.8 langchain-pinecone==0.1.1 -langchain-text-splitters==0.2.0 -langsmith==0.1.63 +langchain-text-splitters==0.2.1 +langsmith==0.1.73 marshmallow==3.21.2 mock==5.1.0 multidict==6.0.5 mypy-extensions==1.0.0 numexpr==2.10.0 numpy==1.26.4 -openai==1.30.4 +openai==1.31.1 opentracing==2.4.0 orjson==3.10.3 packaging==23.2 pinecone-client==3.2.2 pluggy==1.5.0 psutil==5.9.8 -pydantic==2.7.2 -pydantic-core==2.18.3 -pytest==8.2.1 +pydantic==2.7.3 +pydantic-core==2.18.4 +pytest==8.2.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -76,10 +80,10 @@ sortedcontainers==2.4.0 sqlalchemy==2.0.30 tenacity==8.3.0 tiktoken==0.7.0 -tokenizers==0.19.1 +tokenizers==0.15.2 tqdm==4.66.4 -types-requests==2.32.0.20240523 -typing-extensions==4.12.0 +types-requests==2.32.0.20240602 +typing-extensions==4.12.1 typing-inspect==0.9.0 urllib3==2.2.1 vcrpy==6.0.1 diff --git a/.riot/requirements/1f6f978.txt b/.riot/requirements/b555672.txt similarity index 77% rename from .riot/requirements/1f6f978.txt rename to .riot/requirements/b555672.txt index bf44d1459ea..077a6765c09 100644 --- a/.riot/requirements/1f6f978.txt +++ b/.riot/requirements/b555672.txt @@ -2,42 +2,46 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --no-annotate .riot/requirements/1f6f978.in +# pip-compile --no-annotate .riot/requirements/b555672.in # -ai21==2.4.0 +ai21==2.4.1 ai21-tokenizer==0.9.1 aiohttp==3.9.5 aiosignal==1.3.1 annotated-types==0.7.0 +anthropic==0.28.0 anyio==4.4.0 async-timeout==4.0.3 attrs==23.2.0 -boto3==1.34.114 -botocore==1.34.114 -certifi==2024.2.2 +boto3==1.34.120 +botocore==1.34.120 +certifi==2024.6.2 charset-normalizer==3.3.2 -cohere==5.5.3 +cohere==5.5.4 coverage[toml]==7.5.3 dataclasses-json==0.6.6 +defusedxml==0.7.1 distro==1.9.0 exceptiongroup==1.2.1 fastavro==1.9.4 filelock==3.14.0 frozenlist==1.4.1 -fsspec==2024.5.0 +fsspec==2024.6.0 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 httpx-sse==0.4.0 -huggingface-hub==0.23.2 +huggingface-hub==0.23.3 hypothesis==6.45.0 idna==3.7 iniconfig==2.0.0 +jiter==0.4.1 jmespath==1.0.1 jsonpatch==1.33 jsonpointer==2.4 langchain==0.1.20 +langchain-anthropic==0.1.11 langchain-aws==0.1.6 langchain-community==0.0.38 langchain-core==0.1.52 @@ -58,9 +62,9 @@ packaging==23.2 pinecone-client==3.2.2 pluggy==1.5.0 psutil==5.9.8 -pydantic==2.7.2 -pydantic-core==2.18.3 -pytest==8.2.1 +pydantic==2.7.3 +pydantic-core==2.18.4 +pytest==8.2.2 pytest-asyncio==0.21.1 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -77,11 +81,11 @@ sortedcontainers==2.4.0 sqlalchemy==2.0.30 tenacity==8.3.0 tiktoken==0.7.0 -tokenizers==0.19.1 +tokenizers==0.15.2 tomli==2.0.1 tqdm==4.66.4 -types-requests==2.32.0.20240523 -typing-extensions==4.12.0 +types-requests==2.32.0.20240602 +typing-extensions==4.12.1 typing-inspect==0.9.0 urllib3==2.2.1 vcrpy==6.0.1 diff --git a/ddtrace/contrib/anthropic/patch.py b/ddtrace/contrib/anthropic/patch.py index b06ef78e188..632d2aa5235 100644 --- a/ddtrace/contrib/anthropic/patch.py +++ b/ddtrace/contrib/anthropic/patch.py @@ -103,6 +103,9 @@ def traced_chat_model_generate(anthropic, pin, func, instance, args, kwargs): span.set_exc_info(*sys.exc_info()) raise finally: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span=span, resp=chat_completions, args=args, kwargs=kwargs) + span.finish() return chat_completions @@ -175,6 +178,9 @@ async def traced_async_chat_model_generate(anthropic, pin, func, instance, args, span.set_exc_info(*sys.exc_info()) raise finally: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span=span, resp=chat_completions, args=args, kwargs=kwargs) + span.finish() return chat_completions diff --git a/ddtrace/contrib/anthropic/utils.py b/ddtrace/contrib/anthropic/utils.py index f9c7359d3a8..72ca1ec1d64 100644 --- a/ddtrace/contrib/anthropic/utils.py +++ b/ddtrace/contrib/anthropic/utils.py @@ -3,6 +3,7 @@ from typing import Optional from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._integrations.anthropic import _get_attr log = get_logger(__name__) @@ -47,11 +48,3 @@ def _extract_api_key(instance: Any) -> Optional[str]: if client: return getattr(client, "api_key", None) return None - - -def _get_attr(o: Any, attr: str, default: Any): - # Since our response may be a dict or object, convenience method - if isinstance(o, dict): - return o.get(attr, default) - else: - return getattr(o, attr, default) diff --git a/ddtrace/llmobs/_integrations/anthropic.py b/ddtrace/llmobs/_integrations/anthropic.py index 5b18a43dd74..4e368e6de5c 100644 --- a/ddtrace/llmobs/_integrations/anthropic.py +++ b/ddtrace/llmobs/_integrations/anthropic.py @@ -1,10 +1,19 @@ +import json from typing import Any from typing import Dict +from typing import Iterable +from typing import List from typing import Optional from ddtrace._trace.span import Span -from ddtrace.contrib.anthropic.utils import _get_attr from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._constants import INPUT_MESSAGES +from ddtrace.llmobs._constants import METADATA +from ddtrace.llmobs._constants import METRICS +from ddtrace.llmobs._constants import MODEL_NAME +from ddtrace.llmobs._constants import MODEL_PROVIDER +from ddtrace.llmobs._constants import OUTPUT_MESSAGES +from ddtrace.llmobs._constants import SPAN_KIND from .base import BaseLLMIntegration @@ -35,14 +44,125 @@ def _set_base_span_tags( else: span.set_tag_str(API_KEY, api_key) + def llmobs_set_tags( + self, + resp: Any, + span: Span, + args: List[Any], + kwargs: Dict[str, Any], + err: Optional[Any] = None, + ) -> None: + """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.*" tags.""" + if not self.llmobs_enabled: + return + + parameters = { + "temperature": float(kwargs.get("temperature", 1.0)), + "max_tokens": float(kwargs.get("max_tokens", 0)), + } + messages = kwargs.get("messages") + system_prompt = kwargs.get("system") + input_messages = self._extract_input_message(messages, system_prompt) + + span.set_tag_str(SPAN_KIND, "llm") + span.set_tag_str(MODEL_NAME, span.get_tag("anthropic.request.model") or "") + span.set_tag_str(INPUT_MESSAGES, json.dumps(input_messages)) + span.set_tag_str(METADATA, json.dumps(parameters)) + span.set_tag_str(MODEL_PROVIDER, "anthropic") + if err or resp is None: + span.set_tag_str(OUTPUT_MESSAGES, json.dumps([{"content": ""}])) + else: + output_messages = self._extract_output_message(resp) + span.set_tag_str(OUTPUT_MESSAGES, json.dumps(output_messages)) + + usage = self._get_llmobs_metrics_tags(span) + if usage != {}: + span.set_tag_str(METRICS, json.dumps(usage)) + + def _extract_input_message(self, messages, system_prompt=None): + """Extract input messages from the stored prompt. + Anthropic allows for messages and multiple texts in a message, which requires some special casing. + """ + if not isinstance(messages, Iterable): + log.warning("Anthropic input must be a list of messages.") + + input_messages = [] + if system_prompt is not None: + input_messages.append({"content": system_prompt, "role": "system"}) + for message in messages: + if not isinstance(message, dict): + log.warning("Anthropic message input must be a list of message param dicts.") + continue + + content = message.get("content", None) + role = message.get("role", None) + + if role is None or content is None: + log.warning("Anthropic input message must have content and role.") + + if isinstance(content, str): + input_messages.append({"content": content, "role": role}) + + elif isinstance(content, list): + for block in content: + if block.get("type") == "text": + input_messages.append({"content": block.get("text", ""), "role": role}) + elif block.get("type") == "image": + # Store a placeholder for potentially enormous binary image data. + input_messages.append({"content": "([IMAGE DETECTED])", "role": role}) + else: + input_messages.append({"content": str(block), "role": role}) + + return input_messages + + def _extract_output_message(self, response): + """Extract output messages from the stored response.""" + output_messages = [] + content = _get_attr(response, "content", None) + role = _get_attr(response, "role", "") + + if isinstance(content, str): + return [{"content": content, "role": role}] + + elif isinstance(content, list): + for completion in content: + text = _get_attr(completion, "text", None) + if isinstance(text, str): + output_messages.append({"content": text, "role": role}) + return output_messages + def record_usage(self, span: Span, usage: Dict[str, Any]) -> None: if not usage: return input_tokens = _get_attr(usage, "input_tokens", None) output_tokens = _get_attr(usage, "output_tokens", None) - span.set_metric("anthropic.response.usage.input_tokens", input_tokens) - span.set_metric("anthropic.response.usage.output_tokens", output_tokens) - + if input_tokens is not None: + span.set_metric("anthropic.response.usage.input_tokens", input_tokens) + if output_tokens is not None: + span.set_metric("anthropic.response.usage.output_tokens", output_tokens) if input_tokens is not None and output_tokens is not None: span.set_metric("anthropic.response.usage.total_tokens", input_tokens + output_tokens) + + @staticmethod + def _get_llmobs_metrics_tags(span): + usage = {} + prompt_tokens = span.get_metric("anthropic.response.usage.input_tokens") + completion_tokens = span.get_metric("anthropic.response.usage.output_tokens") + total_tokens = span.get_metric("anthropic.response.usage.total_tokens") + + if prompt_tokens is not None: + usage["prompt_tokens"] = prompt_tokens + if completion_tokens is not None: + usage["completion_tokens"] = completion_tokens + if total_tokens is not None: + usage["total_tokens"] = total_tokens + return usage + + +def _get_attr(o: Any, attr: str, default: Any): + # Since our response may be a dict or object, convenience method + if isinstance(o, dict): + return o.get(attr, default) + else: + return getattr(o, attr, default) diff --git a/ddtrace/llmobs/_integrations/langchain.py b/ddtrace/llmobs/_integrations/langchain.py index c55e5d6085b..c802b7737ef 100644 --- a/ddtrace/llmobs/_integrations/langchain.py +++ b/ddtrace/llmobs/_integrations/langchain.py @@ -32,6 +32,7 @@ TOTAL_COST = "langchain.tokens.total_cost" TYPE = "langchain.request.type" +ANTHROPIC_PROVIDER_NAME = "anthropic" BEDROCK_PROVIDER_NAME = "amazon_bedrock" OPENAI_PROVIDER_NAME = "openai" @@ -67,6 +68,8 @@ def llmobs_set_tags( llmobs_integration = "bedrock" elif model_provider.startswith(OPENAI_PROVIDER_NAME): llmobs_integration = "openai" + elif operation == "chat" and model_provider.startswith(ANTHROPIC_PROVIDER_NAME): + llmobs_integration = "anthropic" is_workflow = LLMObs._integration_is_enabled(llmobs_integration) @@ -92,9 +95,9 @@ def _llmobs_set_metadata(self, span: Span, model_provider: Optional[str] = None) or span.get_tag(f"langchain.request.{model_provider}.parameters.model_kwargs.max_tokens") # huggingface ) - if temperature is not None: + if temperature is not None and temperature != "None": metadata["temperature"] = float(temperature) - if max_tokens is not None: + if max_tokens is not None and max_tokens != "None": metadata["max_tokens"] = int(max_tokens) if metadata: span.set_tag_str(METADATA, json.dumps(metadata)) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 252a701776c..3fd2139ff6d 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -52,7 +52,12 @@ log = get_logger(__name__) -SUPPORTED_LLMOBS_INTEGRATIONS = {"bedrock": "botocore", "openai": "openai", "langchain": "langchain"} +SUPPORTED_LLMOBS_INTEGRATIONS = { + "anthropic": "anthropic", + "bedrock": "botocore", + "openai": "openai", + "langchain": "langchain", +} class LLMObs(Service): diff --git a/releasenotes/notes/add-anthropic-llm-observability-27e914a3a23b5001.yaml b/releasenotes/notes/add-anthropic-llm-observability-27e914a3a23b5001.yaml new file mode 100644 index 00000000000..2332d92a3e9 --- /dev/null +++ b/releasenotes/notes/add-anthropic-llm-observability-27e914a3a23b5001.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + LLM Observability: Adds support to automatically submit Anthropic chat messages to LLM Observability. diff --git a/riotfile.py b/riotfile.py index a376a7b2a7b..9b7446fdcf0 100644 --- a/riotfile.py +++ b/riotfile.py @@ -2507,6 +2507,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): "langchain-community": "==0.0.38", "langchain-core": "==0.1.52", "langchain-openai": "==0.1.6", + "langchain-anthropic": "==0.1.11", "langchain-pinecone": "==0.1.0", "langsmith": "==0.1.58", "openai": "==1.30.3", @@ -2523,6 +2524,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION): "langchain-core": latest, "langchain-openai": latest, "langchain-pinecone": latest, + "langchain-anthropic": latest, "langsmith": latest, "openai": latest, "pinecone-client": latest, diff --git a/tests/contrib/anthropic/cassettes/anthropic_completion_invalid_api_key.yaml b/tests/contrib/anthropic/cassettes/anthropic_completion_invalid_api_key.yaml new file mode 100644 index 00000000000..1723c1368a4 --- /dev/null +++ b/tests/contrib/anthropic/cassettes/anthropic_completion_invalid_api_key.yaml @@ -0,0 +1,70 @@ +interactions: +- request: + body: '{"max_tokens": 15, "messages": [{"role": "user", "content": [{"type": "text", + "text": "Hello, I am looking for information about some books!"}, {"type": "text", + "text": "What is the best selling book?"}]}], "model": "claude-3-opus-20240229", + "system": "Respond only in all caps.", "temperature": 0.8}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '300' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.28.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.28.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.9 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"type":"error","error":{"type":"authentication_error","message":"invalid + x-api-key"}}' + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 88f189dac80ac32b-EWR + Connection: + - keep-alive + Content-Length: + - '86' + Content-Type: + - application/json + Date: + - Wed, 05 Jun 2024 16:28:54 GMT + Server: + - cloudflare + request-id: + - req_013JyjhVcnhy8mfJkvBTqMNB + via: + - 1.1 google + x-cloud-trace-context: + - 93ac98996f397cc0399d31159d38f4bb + x-should-retry: + - 'false' + status: + code: 401 + message: Unauthorized +version: 1 diff --git a/tests/contrib/anthropic/cassettes/anthropic_completion_multi_prompt.yaml b/tests/contrib/anthropic/cassettes/anthropic_completion_multi_prompt.yaml index c79af1d1917..fa9b49e5396 100644 --- a/tests/contrib/anthropic/cassettes/anthropic_completion_multi_prompt.yaml +++ b/tests/contrib/anthropic/cassettes/anthropic_completion_multi_prompt.yaml @@ -2,8 +2,8 @@ interactions: - request: body: '{"max_tokens": 15, "messages": [{"role": "user", "content": [{"type": "text", "text": "Hello, I am looking for information about some books!"}, {"type": "text", - "text": "Can you explain what Descartes meant by ''I think, therefore I am''?"}]}], - "model": "claude-3-opus-20240229", "system": "Respond only in all caps."}' + "text": "What is the best selling book?"}]}], "model": "claude-3-opus-20240229", + "system": "Respond only in all caps.", "temperature": 0.8}' headers: accept: - application/json @@ -14,13 +14,13 @@ interactions: connection: - keep-alive content-length: - - '316' + - '300' content-type: - application/json host: - api.anthropic.com user-agent: - - Anthropic/Python 0.26.1 + - Anthropic/Python 0.28.0 x-stainless-arch: - arm64 x-stainless-async: @@ -30,26 +30,26 @@ interactions: x-stainless-os: - MacOS x-stainless-package-version: - - 0.26.1 + - 0.28.0 x-stainless-runtime: - CPython x-stainless-runtime-version: - - 3.10.13 + - 3.10.9 method: POST uri: https://api.anthropic.com/v1/messages response: body: string: !!binary | - H4sIAAAAAAAAA0yOQUvDQBSE/0qYi5cNpLFR3FvUSEuphqb1UJWwJM8a3OzG7ltpCfnvktKCp4GZ - b4bp0dSQaN2ujCbJ/etxc7NVy23++ftVL27znNsEAnzsaKTIObUjCOytHg3lXONYGYZAa2vSkKi0 - 8jWF16HtvAvjKJ5GcXwHgcoaJsOQb/1lkOkwVk8i8ZgVD+lqnRVXwVO6fNkUQT5bpUUWvGMerGfz - 54XA8CHg2HblnpSzZjylDiXbbzIO58jRjydTEaTxWgv402nZozGd5wssp4mA9fzfmiTD8AcAAP// - AwCozOzqEgEAAA== + H4sIAAAAAAAAA0yOYUuEQBiE/8oyn1fw9ipovyXZZWdKKBFUiOnbIemud+8ueMj99/DooE8DM88M + M6NroTHwrgpXZfTinqJvNWz3X69vPBXHbJPdQMIdR1ooYq53BImD7RejZu7Y1cZBYrAt9dBo+tq3 + FKwDO3oOVKiuQqVuIdFY48g46Pf5MuhoWqpn0SgfYxHFRRkUcZom2UZEeb4V+YO4S1NRJs+xSArx + gfs8w+lTgp0dqwPVbM3yrZ4qZ3/IMP4ipr0n0xC08X0v4c/f9YzOjN5dYL1WEta7/9bq+nT6BQAA + //8DAMSYrqgZAQAA headers: CF-Cache-Status: - DYNAMIC CF-RAY: - - 88ea9acec90172b7-EWR + - 88f16344983e1861-EWR Connection: - keep-alive Content-Encoding: @@ -57,7 +57,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 04 Jun 2024 20:17:11 GMT + - Wed, 05 Jun 2024 16:02:36 GMT Server: - cloudflare Transfer-Encoding: @@ -67,19 +67,19 @@ interactions: anthropic-ratelimit-requests-remaining: - '4' anthropic-ratelimit-requests-reset: - - '2024-06-04T20:17:57Z' + - '2024-06-05T16:02:57Z' anthropic-ratelimit-tokens-limit: - '10000' anthropic-ratelimit-tokens-remaining: - '10000' anthropic-ratelimit-tokens-reset: - - '2024-06-04T20:17:57Z' + - '2024-06-05T16:02:57Z' request-id: - - req_01PDCp5gcfpzQ4P5NAdtXfrU + - req_01Bd7D9NJM29LYXW5VTm99rv via: - 1.1 google x-cloud-trace-context: - - 609af05f60c212e11bbb86f767f6f1b0 + - 07d6532b3235336a58f4f8d0baffd032 status: code: 200 message: OK diff --git a/tests/contrib/anthropic/conftest.py b/tests/contrib/anthropic/conftest.py index d5307714849..9784b5e647a 100644 --- a/tests/contrib/anthropic/conftest.py +++ b/tests/contrib/anthropic/conftest.py @@ -1,10 +1,12 @@ import os +import mock import pytest from ddtrace import Pin from ddtrace.contrib.anthropic.patch import patch from ddtrace.contrib.anthropic.patch import unpatch +from ddtrace.llmobs import LLMObs from tests.contrib.anthropic.utils import get_request_vcr from tests.utils import DummyTracer from tests.utils import DummyWriter @@ -18,6 +20,11 @@ def ddtrace_config_anthropic(): return {} +@pytest.fixture +def ddtrace_global_config(): + return {} + + @pytest.fixture def snapshot_tracer(anthropic): pin = Pin.get_from(anthropic) @@ -25,17 +32,39 @@ def snapshot_tracer(anthropic): @pytest.fixture -def mock_tracer(anthropic): +def mock_tracer(ddtrace_global_config, anthropic): pin = Pin.get_from(anthropic) mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) pin.override(anthropic, tracer=mock_tracer) pin.tracer.configure() + if ddtrace_global_config.get("_llmobs_enabled", False): + # Have to disable and re-enable LLMObs to use to mock tracer. + LLMObs.disable() + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) yield mock_tracer @pytest.fixture -def anthropic(ddtrace_config_anthropic): - with override_global_config({"_dd_api_key": ""}): +def mock_llmobs_writer(scope="session"): + patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") + try: + LLMObsSpanWriterMock = patcher.start() + m = mock.MagicMock() + LLMObsSpanWriterMock.return_value = m + yield m + finally: + patcher.stop() + + +def default_global_config(): + return {"_dd_api_key": ""} + + +@pytest.fixture +def anthropic(ddtrace_global_config, ddtrace_config_anthropic): + global_config = default_global_config() + global_config.update(ddtrace_global_config) + with override_global_config(global_config): with override_config("anthropic", ddtrace_config_anthropic): with override_env( dict( diff --git a/tests/contrib/anthropic/test_anthropic.py b/tests/contrib/anthropic/test_anthropic.py index 6de89c87e3a..a619c6a3cf0 100644 --- a/tests/contrib/anthropic/test_anthropic.py +++ b/tests/contrib/anthropic/test_anthropic.py @@ -67,12 +67,13 @@ def test_anthropic_llm_sync_multiple_prompts(anthropic, request_vcr): model="claude-3-opus-20240229", max_tokens=15, system="Respond only in all caps.", + temperature=0.8, messages=[ { "role": "user", "content": [ {"type": "text", "text": "Hello, I am looking for information about some books!"}, - {"type": "text", "text": "Can you explain what Descartes meant by 'I think, therefore I am'?"}, + {"type": "text", "text": "What is the best selling book?"}, ], } ], @@ -227,6 +228,7 @@ async def test_anthropic_llm_async_multiple_prompts(anthropic, request_vcr, snap model="claude-3-opus-20240229", max_tokens=15, system="Respond only in all caps.", + temperature=0.8, messages=[ { "role": "user", @@ -234,7 +236,7 @@ async def test_anthropic_llm_async_multiple_prompts(anthropic, request_vcr, snap {"type": "text", "text": "Hello, I am looking for information about some books!"}, { "type": "text", - "text": "Can you explain what Descartes meant by 'I think, therefore I am'?", + "text": "What is the best selling book?", }, ], } diff --git a/tests/contrib/anthropic/test_anthropic_llmobs.py b/tests/contrib/anthropic/test_anthropic_llmobs.py new file mode 100644 index 00000000000..84a9d865c35 --- /dev/null +++ b/tests/contrib/anthropic/test_anthropic_llmobs.py @@ -0,0 +1,94 @@ +import pytest + +from tests.llmobs._utils import _expected_llmobs_llm_span_event + + +@pytest.mark.parametrize( + "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="")] +) +class TestLLMObsAnthropic: + def test_completion(self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr): + """Ensure llmobs records are emitted for completion endpoints when configured. + + Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation. + """ + llm = anthropic.Anthropic() + with request_vcr.use_cassette("anthropic_completion_multi_prompt.yaml"): + llm.messages.create( + model="claude-3-opus-20240229", + max_tokens=15, + system="Respond only in all caps.", + temperature=0.8, + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "Hello, I am looking for information about some books!"}, + {"type": "text", "text": "What is the best selling book?"}, + ], + } + ], + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="claude-3-opus-20240229", + model_provider="anthropic", + input_messages=[ + {"content": "Respond only in all caps.", "role": "system"}, + {"content": "Hello, I am looking for information about some books!", "role": "user"}, + {"content": "What is the best selling book?", "role": "user"}, + ], + output_messages=[{"content": 'THE BEST-SELLING BOOK OF ALL TIME IS "DON', "role": "assistant"}], + metadata={"temperature": 0.8, "max_tokens": 15.0}, + token_metrics={"prompt_tokens": 32, "completion_tokens": 15, "total_tokens": 47}, + tags={"ml_app": ""}, + ) + ) + + def test_error(self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr): + """Ensure llmobs records are emitted for completion endpoints when configured and there is an error. + + Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation. + """ + llm = anthropic.Anthropic(api_key="invalid_api_key") + with request_vcr.use_cassette("anthropic_completion_invalid_api_key.yaml"): + with pytest.raises(anthropic.AuthenticationError): + llm.messages.create( + model="claude-3-opus-20240229", + max_tokens=15, + system="Respond only in all caps.", + temperature=0.8, + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "Hello, I am looking for information about some books!"}, + {"type": "text", "text": "What is the best selling book?"}, + ], + } + ], + ) + + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="claude-3-opus-20240229", + model_provider="anthropic", + input_messages=[ + {"content": "Respond only in all caps.", "role": "system"}, + {"content": "Hello, I am looking for information about some books!", "role": "user"}, + {"content": "What is the best selling book?", "role": "user"}, + ], + output_messages=[{"content": ""}], + error="anthropic.AuthenticationError", + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), + metadata={"temperature": 0.8, "max_tokens": 15.0}, + tags={"ml_app": ""}, + ) + ) diff --git a/tests/contrib/langchain/cassettes/langchain_community/anthropic_chat_completion_sync.yaml b/tests/contrib/langchain/cassettes/langchain_community/anthropic_chat_completion_sync.yaml new file mode 100644 index 00000000000..283af60ac3b --- /dev/null +++ b/tests/contrib/langchain/cassettes/langchain_community/anthropic_chat_completion_sync.yaml @@ -0,0 +1,85 @@ +interactions: +- request: + body: '{"max_tokens": 15, "messages": [{"role": "user", "content": "When do you + use ''whom'' instead of ''who''?"}], "model": "claude-3-opus-20240229", "temperature": + 0.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '160' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.28.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.28.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.9 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA0xPTUvDQBT8K8ucN5Ck9tA9ijePpVhrJWySZ7ua7It5b7US8t8lxYKngfliZkJo + 4dDLqcqL/W7PtDrUu3r73H4/HOL9Zl08wkJ/BlpcJOJPBIuRu4XwIkHUR4VFzy11cGg6n1rKVhkP + SbIyL+/ystzAouGoFBXuZboVKl2W6BUcjng6c3+ECWKSUGu8GD2T4fqdGjX8Zrz5orE2PJphpIEl + aOCI+dVClIdqJC8cl6H+Uil/UBT8SUKfiWJDcDF1nUW6HnETQhyS3syuLC046X+qWM/zLwAAAP// + AwCVZ1cpJgEAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 88f280bf1ca11811-EWR + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 05 Jun 2024 19:17:30 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + anthropic-ratelimit-requests-limit: + - '5' + anthropic-ratelimit-requests-remaining: + - '4' + anthropic-ratelimit-requests-reset: + - '2024-06-05T19:17:57Z' + anthropic-ratelimit-tokens-limit: + - '10000' + anthropic-ratelimit-tokens-remaining: + - '10000' + anthropic-ratelimit-tokens-reset: + - '2024-06-05T19:17:57Z' + request-id: + - req_01Wtyi2DFVCLRToeZc2tHttk + via: + - 1.1 google + x-cloud-trace-context: + - fc184fcf99f97f1199b087e4ddc2aee5 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/langchain/conftest.py b/tests/contrib/langchain/conftest.py index 5d7a9db0b4e..790f878123a 100644 --- a/tests/contrib/langchain/conftest.py +++ b/tests/contrib/langchain/conftest.py @@ -92,6 +92,7 @@ def langchain(ddtrace_config_langchain, mock_logs, mock_metrics): dict( OPENAI_API_KEY=os.getenv("OPENAI_API_KEY", ""), COHERE_API_KEY=os.getenv("COHERE_API_KEY", ""), + ANTHROPIC_API_KEY=os.getenv("ANTHROPIC_API_KEY", ""), HUGGINGFACEHUB_API_TOKEN=os.getenv("HUGGINGFACEHUB_API_TOKEN", ""), AI21_API_KEY=os.getenv("AI21_API_KEY", ""), ) @@ -106,6 +107,25 @@ def langchain(ddtrace_config_langchain, mock_logs, mock_metrics): unpatch() +@pytest.fixture +def langchain_anthropic(ddtrace_config_langchain, mock_logs, mock_metrics): + with override_global_config(default_global_config()): + with override_config("langchain", ddtrace_config_langchain): + with override_env( + dict( + ANTHROPIC_API_KEY=os.getenv("ANTHROPIC_API_KEY", ""), + ) + ): + patch() + import langchain_anthropic + + mock_logs.reset_mock() + mock_metrics.reset_mock() + + yield langchain_anthropic + unpatch() + + @pytest.fixture def langchain_community(ddtrace_config_langchain, mock_logs, mock_metrics, langchain): import langchain_community diff --git a/tests/contrib/langchain/test_langchain_llmobs.py b/tests/contrib/langchain/test_langchain_llmobs.py index c8cb72009b0..13e99153433 100644 --- a/tests/contrib/langchain/test_langchain_llmobs.py +++ b/tests/contrib/langchain/test_langchain_llmobs.py @@ -502,6 +502,17 @@ def test_llmobs_chain_schema_io(self, langchain_core, langchain_openai, mock_llm ) _assert_expected_llmobs_llm_span(trace[1], mock_llmobs_span_writer, mock_io=True) + def test_llmobs_anthropic_chat_model(self, langchain_anthropic, mock_llmobs_span_writer, mock_tracer): + chat = langchain_anthropic.ChatAnthropic(temperature=0, model="claude-3-opus-20240229", max_tokens=15) + span = self._invoke_chat( + chat_model=chat, + prompt="When do you use 'whom' instead of 'who'?", + mock_tracer=mock_tracer, + cassette_name="anthropic_chat_completion_sync.yaml", + ) + assert mock_llmobs_span_writer.enqueue.call_count == 1 + _assert_expected_llmobs_llm_span(span, mock_llmobs_span_writer, input_role="user") + @pytest.mark.skipif(not SHOULD_PATCH_LANGCHAIN_COMMUNITY, reason="These tests are for langchain >= 0.1.0") class TestLangchainTraceStructureWithLlmIntegrations(SubprocessTestCase): @@ -520,6 +531,11 @@ class TestLangchainTraceStructureWithLlmIntegrations(SubprocessTestCase): DD_API_KEY="", ) + anthropic_env_config = dict( + ANTHROPIC_API_KEY="testing", + DD_API_KEY="", + ) + def setUp(self): patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") LLMObsSpanWriterMock = patcher.start() @@ -578,6 +594,14 @@ def _call_openai_llm(OpenAI): with get_request_vcr(subdirectory_name="langchain_community").use_cassette("openai_completion_sync.yaml"): llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?") + @staticmethod + def _call_anthropic_chat(Anthropic): + llm = Anthropic(model="claude-3-opus-20240229", max_tokens=15) + with get_request_vcr(subdirectory_name="langchain_community").use_cassette( + "anthropic_chat_completion_sync.yaml" + ): + llm.invoke("When do you use 'whom' instead of 'who'?") + @run_in_subprocess(env_overrides=bedrock_env_config) def test_llmobs_with_chat_model_bedrock_enabled(self): from langchain_aws import ChatBedrock @@ -642,3 +666,24 @@ def test_llmobs_langchain_with_openai_disabled(self): LLMObs.enable(ml_app="", integrations_enabled=False, agentless_enabled=True) self._call_openai_llm(OpenAI) self._assert_trace_structure_from_writer_call_args(["llm"]) + + @run_in_subprocess(env_overrides=anthropic_env_config) + def test_llmobs_langchain_with_anthropic_enabled(self): + from langchain_anthropic import ChatAnthropic + + patch(langchain=True, anthropic=True) + + LLMObs.enable(ml_app="", integrations_enabled=False, agentless_enabled=True) + self._call_anthropic_chat(ChatAnthropic) + self._assert_trace_structure_from_writer_call_args(["workflow", "llm"]) + + @run_in_subprocess(env_overrides=anthropic_env_config) + def test_llmobs_langchain_with_anthropic_disabled(self): + from langchain_anthropic import ChatAnthropic + + patch(langchain=True) + + LLMObs.enable(ml_app="", integrations_enabled=False, agentless_enabled=True) + + self._call_anthropic_chat(ChatAnthropic) + self._assert_trace_structure_from_writer_call_args(["llm"]) diff --git a/tests/contrib/langchain/utils.py b/tests/contrib/langchain/utils.py index 629fca145d6..783701deec7 100644 --- a/tests/contrib/langchain/utils.py +++ b/tests/contrib/langchain/utils.py @@ -31,7 +31,7 @@ def get_request_vcr(subdirectory_name=""): cassette_library_dir=os.path.join(os.path.dirname(__file__), "cassettes/%s" % subdirectory_name), record_mode="once", match_on=["path"], - filter_headers=["authorization", "OpenAI-Organization", "api-key"], + filter_headers=["authorization", "OpenAI-Organization", "api-key", "x-api-key"], # Ignore requests to the agent ignore_localhost=True, ) diff --git a/tests/snapshots/tests.contrib.anthropic.test_anthropic.test_anthropic_llm_multiple_prompts.json b/tests/snapshots/tests.contrib.anthropic.test_anthropic.test_anthropic_llm_multiple_prompts.json index c270e7a2473..aebc2405be8 100644 --- a/tests/snapshots/tests.contrib.anthropic.test_anthropic.test_anthropic_llm_multiple_prompts.json +++ b/tests/snapshots/tests.contrib.anthropic.test_anthropic.test_anthropic_llm_multiple_prompts.json @@ -14,13 +14,13 @@ "anthropic.request.api_key": "sk-...key>", "anthropic.request.messages.0.content.0.text": "Hello, I am looking for information about some books!", "anthropic.request.messages.0.content.0.type": "text", - "anthropic.request.messages.0.content.1.text": "Can you explain what Descartes meant by 'I think, therefore I am'?", + "anthropic.request.messages.0.content.1.text": "What is the best selling book?", "anthropic.request.messages.0.content.1.type": "text", "anthropic.request.messages.0.role": "user", "anthropic.request.model": "claude-3-opus-20240229", - "anthropic.request.parameters": "{\"max_tokens\": 15}", + "anthropic.request.parameters": "{\"max_tokens\": 15, \"temperature\": 0.8}", "anthropic.request.system": "Respond only in all caps.", - "anthropic.response.completions.content.0.text": "DESCARTES' FAMOUS PHRASE \"I THINK,", + "anthropic.response.completions.content.0.text": "THE BEST-SELLING BOOK OF ALL TIME IS \"DON", "anthropic.response.completions.content.0.type": "text", "anthropic.response.completions.finish_reason": "max_tokens", "anthropic.response.completions.role": "assistant", @@ -32,9 +32,9 @@ "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "anthropic.response.usage.input_tokens": 45, + "anthropic.response.usage.input_tokens": 32, "anthropic.response.usage.output_tokens": 15, - "anthropic.response.usage.total_tokens": 60, + "anthropic.response.usage.total_tokens": 47, "process_id": 98153 }, "duration": 24102000,