diff --git a/ddtrace/contrib/langchain/patch.py b/ddtrace/contrib/langchain/patch.py index d38f389bdf..3bb94d0300 100644 --- a/ddtrace/contrib/langchain/patch.py +++ b/ddtrace/contrib/langchain/patch.py @@ -626,11 +626,13 @@ def traced_embedding(langchain, pin, func, instance, args, kwargs): span = integration.trace( pin, "%s.%s" % (instance.__module__, instance.__class__.__name__), + submit_to_llmobs=True, interface_type="embedding", provider=provider, model=_extract_model_name(instance), api_key=_extract_api_key(instance), ) + embeddings = None try: if isinstance(input_texts, str): if integration.is_pc_sampled_span(span): @@ -654,6 +656,14 @@ def traced_embedding(langchain, pin, func, instance, args, kwargs): integration.metric(span, "incr", "request.error", 1) raise finally: + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags( + "embedding", + span, + input_texts, + embeddings, + error=bool(span.error), + ) span.finish() integration.metric(span, "dist", "request.duration", span.duration_ns) if integration.is_pc_sampled_log(span): diff --git a/ddtrace/llmobs/_integrations/langchain.py b/ddtrace/llmobs/_integrations/langchain.py index c802b7737e..e50db2a1a1 100644 --- a/ddtrace/llmobs/_integrations/langchain.py +++ b/ddtrace/llmobs/_integrations/langchain.py @@ -10,6 +10,7 @@ from ddtrace.constants import ERROR_TYPE from ddtrace.internal.logger import get_logger from ddtrace.llmobs import LLMObs +from ddtrace.llmobs._constants import INPUT_DOCUMENTS from ddtrace.llmobs._constants import INPUT_MESSAGES from ddtrace.llmobs._constants import INPUT_VALUE from ddtrace.llmobs._constants import METADATA @@ -20,6 +21,7 @@ from ddtrace.llmobs._constants import OUTPUT_VALUE from ddtrace.llmobs._constants import SPAN_KIND +from ..utils import Document from .base import BaseLLMIntegration @@ -42,13 +44,15 @@ "system": "system", } +SUPPORTED_OPERATIONS = ["llm", "chat", "chain", "embedding"] + class LangChainIntegration(BaseLLMIntegration): _integration_name = "langchain" def llmobs_set_tags( self, - operation: str, # oneof "llm","chat","chain" + operation: str, # oneof "llm","chat","chain","embedding" span: Span, inputs: Any, response: Any = None, @@ -57,6 +61,10 @@ def llmobs_set_tags( """Sets meta tags and metrics for span events to be sent to LLMObs.""" if not self.llmobs_enabled: return + if operation not in SUPPORTED_OPERATIONS: + log.warning("Unsupported operation : %s", operation) + return + model_provider = span.get_tag(PROVIDER) self._llmobs_set_metadata(span, model_provider) @@ -79,6 +87,8 @@ def llmobs_set_tags( self._llmobs_set_meta_tags_from_chat_model(span, inputs, response, error, is_workflow=is_workflow) elif operation == "chain": self._llmobs_set_meta_tags_from_chain(span, inputs, response, error) + elif operation == "embedding": + self._llmobs_set_meta_tags_from_embedding(span, inputs, response, error, is_workflow=is_workflow) span.set_tag_str(METRICS, json.dumps({})) def _llmobs_set_metadata(self, span: Span, model_provider: Optional[str] = None) -> None: @@ -194,6 +204,62 @@ def _llmobs_set_meta_tags_from_chain( except TypeError: log.warning("Failed to serialize chain output data to JSON") + def _llmobs_set_meta_tags_from_embedding( + self, + span: Span, + input_texts: Union[str, List[str]], + output_embedding: Union[List[float], List[List[float]], None], + error: bool = False, + is_workflow: bool = False, + ) -> None: + span.set_tag_str(SPAN_KIND, "workflow" if is_workflow else "embedding") + span.set_tag_str(MODEL_NAME, span.get_tag(MODEL) or "") + span.set_tag_str(MODEL_PROVIDER, span.get_tag(PROVIDER) or "") + + input_tag_key = INPUT_VALUE if is_workflow else INPUT_DOCUMENTS + output_tag_key = OUTPUT_VALUE + + output_values: Any + + try: + if isinstance(input_texts, str) or ( + isinstance(input_texts, list) and all(isinstance(text, str) for text in input_texts) + ): + if is_workflow: + formatted_inputs = self.format_io(input_texts) + formatted_str = ( + formatted_inputs + if isinstance(formatted_inputs, str) + else json.dumps(self.format_io(input_texts)) + ) + span.set_tag_str(input_tag_key, formatted_str) + else: + if isinstance(input_texts, str): + input_texts = [input_texts] + input_documents = [Document(text=str(doc)) for doc in input_texts] + span.set_tag_str(input_tag_key, json.dumps(input_documents)) + except TypeError: + log.warning("Failed to serialize embedding input data to JSON") + if error: + span.set_tag_str(output_tag_key, "") + elif output_embedding is not None: + try: + if isinstance(output_embedding[0], float): + # single embedding through embed_query + output_values = [output_embedding] + embeddings_count = 1 + else: + # multiple embeddings through embed_documents + output_values = output_embedding + embeddings_count = len(output_embedding) + embedding_dim = len(output_values[0]) + span.set_tag_str( + output_tag_key, + "[{} embedding(s) returned with size {}]".format(embeddings_count, embedding_dim), + ) + except (TypeError, IndexError): + log.warning("Failed to write output vectors", output_embedding) + def _set_base_span_tags( # type: ignore[override] self, span: Span, diff --git a/releasenotes/notes/feat-llmobs-submit-langchain-embedding-spans-89c8704ef41cfee3.yaml b/releasenotes/notes/feat-llmobs-submit-langchain-embedding-spans-89c8704ef41cfee3.yaml new file mode 100644 index 0000000000..d1e3219fed --- /dev/null +++ b/releasenotes/notes/feat-llmobs-submit-langchain-embedding-spans-89c8704ef41cfee3.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + LLM Observability: The Langchain integration now submits embedding spans to LLM Observability. diff --git a/tests/contrib/langchain/cassettes/langchain_community/openai_embedding_query_integration.yaml b/tests/contrib/langchain/cassettes/langchain_community/openai_embedding_query_integration.yaml new file mode 100644 index 0000000000..102194b5d3 --- /dev/null +++ b/tests/contrib/langchain/cassettes/langchain_community/openai_embedding_query_integration.yaml @@ -0,0 +1,89 @@ +interactions: +- request: + body: '{"input": "", "model": "text-embedding-ada-002", "encoding_format": "base64"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '77' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.30.3 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.30.3 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.5 + method: POST + uri: https://api.openai.com/v1/embeddings + response: + content: "{\n \"object\": \"list\",\n \"data\": [\n {\n \"object\": + \"embedding\",\n \"index\": 0,\n \"embedding\": \"WQDMOlodi7xZNqG6PBYyO3hTJDsxNGM8CtSkurERLjrioqY8zsyHugc8pjvwl7O6N0tKu73p+7uW7lK2eB1PvPZCcDxBq0S8nKCkO5PblLzNYF08Io7Bu99vPbzCT067ivZZvFXmeDqh/+E8M1EivdKQ2jyILxu9966auy21+juJFoW8BT+SO/XHsLsdLwS8+sFYOoFHdDslDSo8A6cTPFIJkDvENri8XZVeO4d+BjxJvz+8YC3dO6Dptzs+3fC8HOOEvCw6uzusfBs9F32yO1eeoryqX9y7LDo7PW3W6rvQ+Fs8NJ0hPGbfLrx0nua7eOd5vOXVj7x1CpG9v9DlPAZVPDwzUSK7QlxZPDBUjjt3NmU8yugJO90NlDz69y07xjPMO4X8MTzy4zK8MB65u4gvGzyVPb68pH7KvNAuMbulT4o8+UYZusOFIzt3og88jyZXPCsd/DuOq5e8UIc7PHS+ETtHXZa8oTU3vGxbK7xIRAA8XgGJu20MQLx01Ds9nbbOuvESczzUKFm7xABjvNaqrbqoYkg4FfvdvIrHGT3BaOS8Vu2NvBiTXDzENjg7odAhu7Z3AL3t6Yq8cVVTvOdtDryox928MTTjOy1wEDxlXdo8Tzu8Ou2zNbwnWak8q/pGu73pe7sQ6ws8mFB8uyKk6zsbEkW8JFyVvK0tMLwcSBq86QWNvNbZ7TwipOs7tPWrvBvc77wP5PY7/hEBPXw3IrwTNJ888vlcPGR9hbukSHU86QUNPWJgRrsP5Ha8AKPqPArUJLy61r08of/hvKUZNbwIUtC8lFZUPFVSozvVXq47GUTxvGP7MDvu/7Q7yLUgvNaqrbp3NuW87+aevIFH9DyIL5s6f6/1u8SbTTwQy+C6Qi2ZPA5pNzzZeAE9rS0wPFGd5bwaxsW8OrQIPQUJPby+VSY8f4A1vG0MwDxNbWg5jquXPDEFo7xTVY+8GRWxvApvD7ubuTq62o6rPEcnwTrGM8w8c7f8PB9b2LoFPxI9OwCIPB57Azx9F/c8Th79PPJ+nTo+rjC/b/OpvPX9BTyDZLO8qTMItxor2zyBAgq7wk/OPLqg6Lu7h9I8WWVhuoECijxb/V87kiqAO4wTGTqX1by8ouZLvNJhGr2kA4s7G9xvOusx4bvNYF08A+x9vIheW7zCGXm8ZcmEvO2ztTsfLJg8eOd5u64UmjteMMk8PSxcPAyb4zwxBSM9nocOu+G7PLu/0GW7GGScPPES8zvyGYg8pbQfvWOWm7wwVI47HV7EvOmZ4ruIlDC7QavEPARYKLzHf0s7B2tmvF9NCLxCXNk61ZSDvAB0qjta57W6ssLCPFBxETz3KVo8nmdjPCLEljwBwCm8R10WvG691Dut91o8qzAcPdD42zw5aAk7yFALvCBCwjuAtgq69hOwvJptOzwTz4k7dL4RPJUHaTwMbCO8iiyvPLVw67phShy7/turvBv8mjuR9Kq78hkIvB/2QrwaK9s8XxezuDmt8zu6cag8nuwjPQWkJ7w8sZw81EgEPaxGxrl+zyC8YGMyvMYzTDuH4xu818DXuwTzkjtXfne7a0UBPXJykrx7hg08e+siPEipFT3zypw7DUz4O/IZiDwZsJs8ODI0vaabibtoxhi9nmfjvBLon7sHa+Y6p+eIvIVhRzz1/YU5vlWmPKvLhrwAo+o831kTPB6qQzy7UX28yZyKPOydCzwBwKk7bdZquzxFcrrbdRW7U1WPO0CVmrx/r3U73zlovNwmqrxgLd07OWiJO1Q1ZLzqSnc8nM/kvIrHmTwWMbO89ZHbuz0s3DwaYbC8NmTgvKAfDTyXn+c48GHePC9tpLsRZks8gDHKu06KJ7znNzk8LiElu3vrIrzNYF08yugJPBKDirzg6ny8+lzDu0NDwzwU5TO9xNEivDY1oLx6tU28c+3RPKpf3DvYcew5AVT/vGBjMryYUHy7ppuJu/NlBzy7Uf271EiEPA+fDLwbEkW8UZ1lvLnv07ywQO675gFkvBP+SbwYZBy8yZyKPD7d8DszzOE7rffaO9Jhmrvc8FQ8Ds7Mu3gdT7tjlps7CIilPNdbQrwFpKe77v+0PCVyPzswg866PSzcPJKPFTyeZ+M7fei2PKsQ8bt6f3g7q8sGvaiYHbyu3kQ71V4uvKp/hzxpjVc8ghi0uiiF/TsJA+W7Zt8uPI8m17vv5h45/turO1iFjLw4HAq9xYK3O633WjzfOei6nxh4PBorW7zoHiM9M1Eiupse0Ls20Io8HkWuunjn+bzpaiI8RBQDvAFblDz0e7G7nDsPO3/lSrvBiA+9rS0wPQ4z4jumykm8YMjHPEFGrztzIye8//HVu6iYnTsohf08PcfGOor2WTxcrnS75gFkvNBkhrzV+Ri9al6XOwa6Ubo1hIu8MLmjPCm7UrtFYII8I6uAPE7vvLvsAqE8jnVCPLrWPTwuhjo8oWuMvN9vPTxtDEA7HK0vPJlXETwD7P28PhNGvOUa+rsh81a8MB45PGwlVjzDIA66y5kePKLmyzsqPSe8ldiou4he27sz7Aw9S6apPI1fGDtE9Nc7SA4ru0FGr7yVB+m7lu7SPICWX7wriSY8b25pvLpxKDvQZAa8q5UxvFNVjzyyJ1i8OrSIPMcEjLsKb488Il8BvawXhrwNgs07AyJTO3YgOzzsAiG7StXpvD9Jm7vUKFm84KWSOx7gGL2ILxs8dL4RPPkQxLwzUSI8W/1fvMdJdjwlPOo80bAFPEipFbzy4zK69KrxvIt4rrxQcZE9FOUzPaOXYDvR30W8O5TdOwRYqLvPGAe7ieCvPB8smLzaCWu7wO2kvB57A7pcaYq6VKEOPPL53DvOEfI861EMvIiUsLyjl2A7K1PRu9RIBLz9xQG8RqyBPI8mVzptpyo9oWuMO9u6fzzzyhw9xmkhvMQAYztUNWQ8+vctO1oW9rr+dhY8Oc0evCRcFbppEhg8GGScPLy9pzx+ags8oWsMu2UuGrxHJ8E7WoIgvbm5/jsQy+C8nmdju/PKHD19Hoy81g9DvDaatTxBRi85Sb+/vDoZHr05zZ481K0ZPEkkVTyUJ5S7e+uivL2kEbzrzEs7ooE2vCtT0byr+ka8qGJIO61jBTv54QM8CIglvMC3T7zPGIe7KIV9vIJOibyQDUG6GJPcu4QVSLweewM9+UYZPEtBFLxZZWE7ZV3aOypsZ7ubuTq9Qw3uO5vvD73J4fS7GP+Gu0/WJjw7ZR081BKvvEhEALt3NuU8AA+VPOClkruxdsM7PEVyu7+hJTz2eEW4WQDMO6fgc7wjdSs7zxgHPNoplrw3gR+9L5zku4/3ljvGaSG8n9MNPIZ3cTy5JSm8qTMIPJUH6TvqtiE7bCXWO9VerjudgPm7/cWBPBKDijy/PBA8pAOLvIl7mrrSK0W84fGRuHO3fDyomB06al4XPFv9XzzaCes7A6cTvbm5/rvlUE88asMsO+NTuzzDIA680mGau2l3LbvUEq88h+MbvMbOtrph3vG7aXetvLRawbymm4m5+i0DvJRWVDyeh468YGMyvdLGr7wD7H28GGQcO9aqrTsEWKi82ikWOytTUbu/0OW6FmeIPEu8UztAMAW9pxbJO/Bh3jyJRcU8hN/yOp6Hjjl0nmY80savOwGKVLwuvA+8jT/tPJQnFLto3MK8+6jCPE1taDwZeka8wyCOuuiDuLzwMp65Yd5xOnkEuTvD6rg7RI/CvITf8rvxzQi8yugJunc2ZTxuWD88iXuavFbtjby5wBO8rqjvPAElvzyMKUM8NYQLPJVzE71Lpqk8S4Z+O8qytDzTQe+7EuifvEPerTvPR0e8b46UPMMgDjzJZrU75zc5POgeIzxXfnc8nKAkPEasAbrzyhy79HuxPBQU9LxIPes7StVpvFnRiztdtYm67v80O8XnTLxE9Fe8K+67PFhPN7qpePK8Fsydu40/bTwaxkW8r68EvB9b2DyDmgg9Th79OvqSGDzPfRw9tXBruknu/7vBaOQ8wO2kvLh0lLsIUlA8MuV3vFuYyjx+agu9lok9vUg96zw/5IU8KIV9vOFWJ7sCcb68feg2vHIGaLxAlZq8F32yOTXpIDpnKy686IO4vB+RrTx9F/e7ven7PEXFF7vQyZu8TW1oPF21CbyTQCq88vlcvOEg0ry5wJM8SfUUvO7/NDyrlTG8yzSJPMxKs7tP1qa8kfQqPEB1bzzEm028c7f8u0ItmbzYcew7GURxO1rnNTxIDqu7mm27vOfSI7hgY7K7/FlXO0Vggjz3Kdo8kNfrOwB0qrzXwFc8QeGZvLERrruhmkw8dzZlvNhxbLu09Su6c+3Ru/AyHrsBVP+8BzwmPI8m1zseqsM8LbV6O3c2ZTyry4Y8Xsuzu7eNqrsxaji8rqjvutzBFL0FhPw7+sHYuzY1IL3NljK8rZLFO3EmEzwtcJC7D+T2uwIMKT1FxZc8rqhvvFRrObxAlZq8G5cFPTXpoDsTY1885dWPvJef57x3bLo785THuxEBtrvZ80C7XjDJO1G9EL2sF4a86hu3PHUKkbwfLBi8KKWovHa7pbsy5Xe8CBz7u68qRL16f/i6SKmVvKbKyTzuyd88K4kmvCiF/bs0OAw8QeEZuxKDirxbzp+83ohTvLnvU72sRkY7aMYYvH+vdTwqbOe8GGScO0oLv7wJA2W6pi9fvIwTGTwMBw68wFK6uTloibxRvZA8KbtSu12V3jvTQW+8dU97u96I0zp0nma8oB8NvIQVSLuP9xa83Q0Uu1OETzu38r88zZYyPLnAkzyz34G8hOYHPWjGmLuXn+e5y/4zPCnxJzxh3nG80MkbPBpLBryh0CE8QXyEu0QUAzz89MG7vlUmPPXHsDs+E0a7TW3ouW5YPztTH7o6/hGBvHkEuTtgmYc70Xowuni4uTwLu468pgAfOw1TDb2fGPi8F7OHu5RWVDvuZMo66hs3OqvLhjzhVqc8IxAWPHSe5ryIlLA8Vu2NPP8nK7ySb+q8tlfVuoYyh7zRFZu6ErJKPJaJvbvUEq8898TEPMSbzbxdlV48xR2iu198SDx6tU04jfoCPCskkbxeAQk8az7sOl5mnjxLvFO8Qw1uPt6IU7yZIby8K1NRPGyRAL26oGg8nYD5PJ22TjzmIQ880savOy4hJbwRnKC8bdZqu2+OFL30Fhw86LkNvHCkPrzMefO8xxo2vBGcoLz54YO83iM+PKOX4DvoHqO7tPUrPJPblDzl1Q+8aw+sPDq0CDw/5IW7T9YmvIJ9STwj2sA6yDDgvH0Xd7znbQ47RnYsu4osrzw7lN08ue/TPFW3uLxDQ8M8xwQMvT9Jm7oMbCM9AHQqvND427x6f/i75oakO6BOzbyuqO+80PjbPH2DoTtwH348oWsMPd856Lpjlps88GFeO1JupbxychI8XGmKvDoZnjvmhqS84VanPMv+s7vqtqE8RcWXO9oplju+H1E8Vs3ivIl7mrqb74+8F32yPPiVBDyzDkK93PBUvFodizyywsK5+vetPEf4AD0oCj48Ds7MO655L7yXn2e7gBsgu1C2+7w1s0u7oZpMuxP+yTznsni8EOuLurnAk7yZV5E8sREuPBd9sjs640i7GxJFvIvdwzyPwcG8+UYZu30eDLzAt0881ywCvBDL4DxuvVS8tL9WvAvqzrnJ4XS89BYcPGkSGLx2hdA8XgGJu5ygpDu4Pj88M1EivC+c5DwriaY8of9huefSI7ye7CM7TlRSu9KQ2rxPBee5DR04PKwXhjpRvRC9U4TPvHjujjwMBw48MuX3vDAeuTyPkgE8nDsPPN6I07l96DY7br3UuhE3Cz3zypy8WkxLu837Rzw5aIm7GMkxvHA/KTwJA+U7Um4lPMhQi7wjEBY8rOGwvAo5urzW4AK8sviXvMMgjjyaCCa7twjqOhLoHzsfka285gHkvNKQWry2d4C7ec5jPBP+ybv/wpW8x3/LPKE1t7xmepk8M1EivF+yHb7bP8A8s9+BO198SDzlUM+6pH5KvGt0QTxFYIK8QXwEOgWkp7v5EMQ7SEQAPK3Imrxq+QG8/10AvMs0CTyYUHy8cNqTO23WajzmIQ+8RPTXObSQlrrkn7o7ZysuO05UUruaCKY6i6duvFEipjx+mcu8FjEzPBLon7zX9iy8zxgHPbKTArszUSI7cKQ+PM7Mhzt2hdC6OwAIvW/zqTwv0rk8X02IPOfSozyMjtg7x0n2vKUZtTyb74+7+JWEvJptOzw1Tja9ppuJvJ7sI7xB4Rk75gFkPCF4lzzfOeg8Y5abOzcVdTxGEZc6of/hOZQnFLyHfoa8U4TPO7nAkzv+dhY8WdELPFDsULx4UyQ9RqyBvK+P2TuIyoU7zhHyO3hTpLtQcRG9KAo+O5lXkTzUSAS73aFpPBTlMzySKgA8cB/+vM7MBz0mI9Q72imWPB0vhDy1cOs8SA6rvHc25bvLyN68yWa1vJ/TDTyeZ2O8+sHYOlNVj7xG28G7lFbUPL/QZTxQh7s8NDiMuxguR7z7eQI8E5k0vIJ9ybx9F3e8MFSOPEtBFLpEj0K8wFK6PK33Wjz9Cuy8xmmhO7h0FDzxzYg86LmNvHc25bqcz+Q7v9DlO30eDLugH428oWuMPJOlPz2tYwW78hkIPPt5gjzbdZW8kb7VuxEBtr3XLAK8HnsDPSmMEjwBVP+8ohwhPYrHmTyBzLS7YRRHOrBgGTy8vae7grMeOrZXVTwq2BG87+YePfiVhDw0nSE6+94XvdZFGLwjqwA9kyB/uwjtursGutG83XIpvORp5bzPfRy86TTNvPj6mTxmRMQ6nocOPKfniLosBOa6aa2COwhS0Lx1b6a7z+KxPKK3i7zpzze8IN0sPO41CrzeIz67KIX9uxj/BryqX9y8+PqZu66ob7tQ7NC8lXMTPa2SxTxRnWW8IELCvAI76TsSfPU7Y/swvI5GAj2WuP27bCXWO6KBNrzvevQ6asOsO3YgO7yT25Q8yzSJvI8mVzxbM7W8+sHYOz39m7xGrAG8aihCOm5CFb1ThE88Vs1ivHPtUbyOdUI8RPTXvDLl9zwSskq8DNE4vB9bWDwQtTa8B2tmvI0/7TcfLJg8oreLPDY1IDx8NyI8VGu5vK8qRDzbuv864fGRPKx8m7vR30W8HS+EPJseULxXOQ28b46UvBM0n7znsni7Pt3wPLuH0jvXLAK8ko8VvDbQir0MBw49eB1PvJOlPzxEFIM4gk4JPKIcITyTQKq8zHlzvFLTujzHSXa8W86fPMGID72NxK08YzEGvVoWdrz+dhY8iiwvPHTUu7wzzOE7UOzQOhuXhTwKtHm7TT6oPDBUDjxYT7c8RWACOjdLSjzENri8S0EUvNs/QDzdoem8OwAIPGuqljzQLjG6bkIVvKfniLp7ho08LetPu8xKM7wdXsS8HEgavWYObzzkaWW8TT6ou6H/YTwLIKQ5fRd3PAElPzwttfo7BouRuzE047tYhQw8a0WBOgehuzsaS4Y7hBXIOxj/Bjwt1aU8ZV1avHtmYrs0OIw7fYOhPOYBZLzTd0Q8kVlAPB+RrbwxNOO8W86fO2LF27wWzB29QJUaO087PDpuWL+7tSsBvTFqODuHfoY7aihCPDQ4jLyIXts8LevPundsOrxEj8K8D+T2PLS/1jy5wBM9RydBO198yDyDZLO6/yerO2TiGr29bry7kb7VPFxpijwvnOQ8GxJFOdcsgjswuaM8GbCbO87MBz0Xswe8HZSZOxguR7zpaqK8+JUEvDj83rxsWyu8XGkKvF9NiDtIPes85DqlPCLElrmGMge9C4U5PYqRxLtaFna8liQoPJef57waK9u8mqMQPNrEgLw/SZs8odAhvIun7jqXcKe73lJ+PPxZ17xl+MS8CFLQvJbuUjywYJm7f6/1vBFmy7wh81a8ZZMvOy1wkDu2V9U8p0yeO2mN17tRIiY9SKmVO4aXHDuWv5I8FmcIvGwl1rtVHE487U4gPTZkYDyuFJq782WHvOHxkbw6tIg8SVoqvLERLrzQLrE8XRofuwDZPzxx8L27ZnqZPPOURzvwMp47PSzcuwWkJ7xHjFa7P0mbvHq1TTxaTMs8BT+Su3AffryWib08MWo4vCilKL15zmO7JiPUPCBCQrxy16c78y+yO31NTDqr+sY7USImO+3pCryrEHG80GQGvQWkpzwNU408sREuvB+Rrbwe4Ji7\"\n + \ }\n ],\n \"model\": \"text-embedding-ada-002\",\n \"usage\": {\n \"prompt_tokens\": + 1,\n \"total_tokens\": 1\n }\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ab773766b878298-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 30 Jul 2024 18:35:52 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=ggWVHFgioAT1pC5qtmqPKsAx5EYcmz03sJ14ffLSumE-1722364552-1.0.1.1-hQLT0WbxlShI3_4cRRp3AsHQfKlVmKcrGyAUki5OMG5ABAx3zlUdkqomhbmJtS9T8DH0T5fx8MKpn0kYv1nF.w; + path=/; expires=Tue, 30-Jul-24 19:05:52 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=65Hy9XC0Yv6_M0E8DlThaqw38AI.X7VeIW6BvODc3Ic-1722364552959-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-allow-origin: + - '*' + alt-svc: + - h3=":443"; ma=86400 + openai-model: + - text-embedding-ada-002 + openai-organization: + - datadog-4 + openai-processing-ms: + - '21' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=15552000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-reset-requests: + - 6ms + x-request-id: + - req_59eefe40e0302d1cd8eca8b6e780227c + http_version: HTTP/1.1 + status_code: 200 +version: 1 diff --git a/tests/contrib/langchain/test_langchain_llmobs.py b/tests/contrib/langchain/test_langchain_llmobs.py index 8c82318fbb..13bc4ae86d 100644 --- a/tests/contrib/langchain/test_langchain_llmobs.py +++ b/tests/contrib/langchain/test_langchain_llmobs.py @@ -127,6 +127,26 @@ def _invoke_chain(cls, chain, prompt, mock_tracer, cassette_name, batch=False): LLMObs.disable() return mock_tracer.pop_traces()[0] + def _embed_query(cls, embedding_model, query, mock_tracer, cassette_name): + LLMObs.enable(ml_app=cls.ml_app, integrations_enabled=False, _tracer=mock_tracer) + if cassette_name is not None: + with get_request_vcr(subdirectory_name=cls.cassette_subdirectory_name).use_cassette(cassette_name): + embedding_model.embed_query(query) + else: # FakeEmbeddings does not need a cassette + embedding_model.embed_query(query) + LLMObs.disable() + return mock_tracer.pop_traces()[0] + + def _embed_documents(cls, embedding_model, documents, mock_tracer, cassette_name): + LLMObs.enable(ml_app=cls.ml_app, integrations_enabled=False, _tracer=mock_tracer) + if cassette_name is not None: + with get_request_vcr(subdirectory_name=cls.cassette_subdirectory_name).use_cassette(cassette_name): + embedding_model.embed_documents(documents) + else: # FakeEmbeddings does not need a cassette + embedding_model.embed_documents(documents) + LLMObs.disable() + return mock_tracer.pop_traces()[0] + @pytest.mark.skipif(LANGCHAIN_VERSION >= (0, 1), reason="These tests are for langchain < 0.1.0") class TestLLMObsLangchain(BaseTestLLMObsLangchain): @@ -315,6 +335,56 @@ def test_llmobs_chain_schema_io(self, langchain, mock_llmobs_span_writer, mock_t ) _assert_expected_llmobs_llm_span(trace[1], mock_llmobs_span_writer, mock_io=True) + def test_llmobs_embedding_query(self, langchain, mock_llmobs_span_writer, mock_tracer): + embedding_model = langchain.embeddings.OpenAIEmbeddings() + with mock.patch("langchain.embeddings.OpenAIEmbeddings._get_len_safe_embeddings", return_value=[0.0] * 1536): + trace = self._embed_query( + embedding_model=embedding_model, + query="hello world", + mock_tracer=mock_tracer, + cassette_name="openai_embedding_query_39.yaml" if PY39 else "openai_embedding_query.yaml", + ) + assert mock_llmobs_span_writer.enqueue.call_count == 1 + span = trace[0] if isinstance(trace, list) else trace + mock_llmobs_span_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + span_kind="embedding", + model_name=embedding_model.model, + model_provider="openai", + input_documents=[{"text": "hello world"}], + output_value="[1 embedding(s) returned with size 1536]", + tags={"ml_app": "langchain_test"}, + integration="langchain", + ) + ) + + def test_llmobs_embedding_documents(self, langchain, mock_llmobs_span_writer, mock_tracer): + embedding_model = langchain.embeddings.OpenAIEmbeddings() + with mock.patch( + "langchain.embeddings.OpenAIEmbeddings._get_len_safe_embeddings", return_value=[[0.0] * 1536] * 2 + ): + trace = self._embed_documents( + embedding_model=embedding_model, + documents=["hello world", "goodbye world"], + mock_tracer=mock_tracer, + cassette_name="openai_embedding_document_39.yaml" if PY39 else "openai_embedding_document.yaml", + ) + assert mock_llmobs_span_writer.enqueue.call_count == 1 + span = trace[0] if isinstance(trace, list) else trace + mock_llmobs_span_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + span_kind="embedding", + model_name=embedding_model.model, + model_provider="openai", + input_documents=[{"text": "hello world"}, {"text": "goodbye world"}], + output_value="[2 embedding(s) returned with size 1536]", + tags={"ml_app": "langchain_test"}, + integration="langchain", + ) + ) + @pytest.mark.skipif(LANGCHAIN_VERSION < (0, 1), reason="These tests are for langchain >= 0.1.0") class TestLLMObsLangchainCommunity(BaseTestLLMObsLangchain): @@ -499,6 +569,59 @@ def test_llmobs_anthropic_chat_model(self, langchain_anthropic, mock_llmobs_span assert mock_llmobs_span_writer.enqueue.call_count == 1 _assert_expected_llmobs_llm_span(span, mock_llmobs_span_writer, input_role="user") + def test_llmobs_embedding_query(self, langchain_community, langchain_openai, mock_llmobs_span_writer, mock_tracer): + if langchain_openai is None: + pytest.skip("langchain_openai not installed which is required for this test.") + embedding_model = langchain_openai.embeddings.OpenAIEmbeddings() + with mock.patch("langchain_openai.OpenAIEmbeddings._get_len_safe_embeddings", return_value=[0.0] * 1536): + trace = self._embed_query( + embedding_model=embedding_model, + query="hello world", + mock_tracer=mock_tracer, + cassette_name="openai_embedding_query.yaml", + ) + assert mock_llmobs_span_writer.enqueue.call_count == 1 + span = trace[0] if isinstance(trace, list) else trace + mock_llmobs_span_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + span_kind="embedding", + model_name=embedding_model.model, + model_provider="openai", + input_documents=[{"text": "hello world"}], + output_value="[1 embedding(s) returned with size 1536]", + tags={"ml_app": "langchain_test"}, + integration="langchain", + ) + ) + + def test_llmobs_embedding_documents( + self, langchain_community, langchain_openai, mock_llmobs_span_writer, mock_tracer + ): + if langchain_community is None: + pytest.skip("langchain-community not installed which is required for this test.") + embedding_model = langchain_community.embeddings.FakeEmbeddings(size=1536) + trace = self._embed_documents( + embedding_model=embedding_model, + documents=["hello world", "goodbye world"], + mock_tracer=mock_tracer, + cassette_name=None, # FakeEmbeddings does not need a cassette + ) + assert mock_llmobs_span_writer.enqueue.call_count == 1 + span = trace[0] if isinstance(trace, list) else trace + mock_llmobs_span_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + span_kind="embedding", + model_name="", + model_provider="fake", + input_documents=[{"text": "hello world"}, {"text": "goodbye world"}], + output_value="[2 embedding(s) returned with size 1536]", + tags={"ml_app": "langchain_test"}, + integration="langchain", + ) + ) + @pytest.mark.skipif(LANGCHAIN_VERSION < (0, 1), reason="These tests are for langchain >= 0.1.0") class TestTraceStructureWithLLMIntegrations(SubprocessTestCase): @@ -550,6 +673,9 @@ def _assert_trace_structure_from_writer_call_args(self, span_kinds): elif span_kind == "llm": assert len(call_args["meta"]["input"]["messages"]) > 0 assert len(call_args["meta"]["output"]["messages"]) > 0 + elif span_kind == "embedding": + assert len(call_args["meta"]["input"]["documents"]) > 0 + assert len(call_args["meta"]["output"]["value"]) > 0 @staticmethod def _call_bedrock_chat_model(ChatBedrock, HumanMessage): @@ -578,6 +704,18 @@ def _call_openai_llm(OpenAI): with get_request_vcr(subdirectory_name="langchain_community").use_cassette("openai_completion_sync.yaml"): llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?") + @staticmethod + def _call_openai_embedding(OpenAIEmbeddings): + embedding = OpenAIEmbeddings() + with mock.patch("langchain_openai.embeddings.base.tiktoken.encoding_for_model") as mock_encoding_for_model: + mock_encoding = mock.MagicMock() + mock_encoding_for_model.return_value = mock_encoding + mock_encoding.encode.return_value = [0.0] * 1536 + with get_request_vcr(subdirectory_name="langchain_community").use_cassette( + "openai_embedding_query_integration.yaml" + ): + embedding.embed_query("hello world") + @staticmethod def _call_anthropic_chat(Anthropic): llm = Anthropic(model="claude-3-opus-20240229", max_tokens=15) @@ -647,6 +785,24 @@ def test_llmobs_with_openai_disabled(self): self._call_openai_llm(OpenAI) self._assert_trace_structure_from_writer_call_args(["llm"]) + @run_in_subprocess(env_overrides=openai_env_config) + def test_llmobs_langchain_with_embedding_model_openai_enabled(self): + from langchain_openai import OpenAIEmbeddings + + patch(langchain=True, openai=True) + LLMObs.enable(ml_app="", integrations_enabled=False) + self._call_openai_embedding(OpenAIEmbeddings) + self._assert_trace_structure_from_writer_call_args(["workflow", "embedding"]) + + @run_in_subprocess(env_overrides=openai_env_config) + def test_llmobs_langchain_with_embedding_model_openai_disabled(self): + from langchain_openai import OpenAIEmbeddings + + patch(langchain=True) + LLMObs.enable(ml_app="", integrations_enabled=False) + self._call_openai_embedding(OpenAIEmbeddings) + self._assert_trace_structure_from_writer_call_args(["embedding"]) + @run_in_subprocess(env_overrides=anthropic_env_config) def test_llmobs_with_anthropic_enabled(self): from langchain_anthropic import ChatAnthropic