diff --git a/docs/my-website/docs/observability/opik_integration.md b/docs/my-website/docs/observability/opik_integration.md new file mode 100644 index 000000000000..de4a8a06622e --- /dev/null +++ b/docs/my-website/docs/observability/opik_integration.md @@ -0,0 +1,93 @@ +import Image from '@theme/IdealImage'; + +# Comet Opik - Logging + Evals +Opik is an open source end-to-end [LLM Evaluation Platform](https://www.comet.com/site/products/opik/?utm_source=litelllm&utm_medium=docs&utm_content=intro_paragraph) that helps developers track their LLM prompts and responses during both development and production. Users can define and run evaluations to test their LLMs apps before deployment to check for hallucinations, accuracy, context retrevial, and more! + + + + +:::info +We want to learn how we can make the callbacks better! Meet the LiteLLM [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or +join our [discord](https://discord.gg/wuPM9dRgDw) +::: + +## Pre-Requisites + +Ensure you have run `pip install opik` for this integration + +```shell +pip install opik litellm +``` + +## Quick Start +Use just 2 lines of code, to instantly log your responses **across all providers** with Opik + +Get your Opik API Key by signing up [here](https://www.comet.com/signup?utm_source=litelllm&utm_medium=docs&utm_content=api_key_cell)! + +```python +import litellm +litellm.success_callback = ["opik"] +``` + +Full examples: + +```python +# pip install opik +import litellm +import os + +os.environ["OPIK_API_KEY"] = "" + +# LLM provider API Keys: +os.environ["OPENAI_API_KEY"] = "" + + + +# set "opik" as a callback, litellm will send the data to an Opik server (such as comet.com) +litellm.success_callback = ["opik"] + +# openai call +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": "Why is tracking and evaluation of LLMs important?"} + ] +) +``` + +If you are using a streaming response, you need to surround the +call with Opik's `@track` and provide `current_span_data` and `current_trace_data`: + +```python +from opik import track +from opik.opik_context import get_current_trace_data, get_current_span_data + +litellm.success_callback = ["opik"] + +@track() +def streaming_function(input): + messages = [{"role": "user", "content": input}] + response = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + metadata = { + "opik": { + "current_span_data": get_current_span_data(), + "current_trace_data": get_current_trace_data(), + "tags": ["streaming-test"], + }, + }, + stream=True, + ) + return response + +response = streaming_function("Why is tracking and evaluation of LLMs important?") +chunks = list(response) +``` + +## Support & Talk to Founders + +- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) +- [Community Discord 💭](https://discord.gg/wuPM9dRgDw) +- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬ +- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai diff --git a/docs/my-website/img/opik.png b/docs/my-website/img/opik.png new file mode 100644 index 000000000000..d56195c5d5f9 Binary files /dev/null and b/docs/my-website/img/opik.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 5920b8841b3f..b11778750dc3 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -278,6 +278,7 @@ const sidebars = { "observability/greenscale_integration", "observability/supabase_integration", `observability/telemetry`, + "observability/opik_integration", ], }, { diff --git a/litellm/__init__.py b/litellm/__init__.py index abc336f34cbd..1ff963ad7fb8 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -51,6 +51,7 @@ "braintrust", "arize", "gcs_bucket", + "opik", ] _known_custom_logger_compatible_callbacks: List = list( get_args(_custom_logger_compatible_callbacks_literal) diff --git a/litellm/integrations/opik/opik.py b/litellm/integrations/opik/opik.py new file mode 100644 index 000000000000..ceacd27e334a --- /dev/null +++ b/litellm/integrations/opik/opik.py @@ -0,0 +1,138 @@ +""" +Opik Logger that logs LLM events to an Opik server +""" + +from typing import Any, Dict, Callable +import datetime + +import litellm +from litellm.types.utils import ModelResponse +from litellm.integrations.custom_logger import CustomLogger + +from .utils import ( + pformat, + get_current_span_id, + get_current_trace_id, + model_response_to_dict, + redact_secrets, +) + +class OpikLogger(CustomLogger): + """ + Opik Logger for logging events to an Opik Server + """ + def __init__(self): + try: + import opik + self.opik = opik + except ImportError: + self.opik = None + + def log_event( + self, + kwargs: Dict[str, Any], + response_obj: ModelResponse, + start_time: datetime.datetime, + end_time: datetime.datetime, + print_verbose: Callable, + ) -> None: + """ + Args: + kwargs: the request dictionary + response_obj: ModelResponse from LLM model + start_time: datetime + end_time: datetime + print_verbose: function used for printing + """ + if self.opik is None: + print_verbose(pformat("opik is not installed", "error")) + print_verbose(pformat("pip install opik")) + return + + if kwargs.get("stream", False): + print_verbose("opik stream logging") + if kwargs.get("complete_streaming_response"): + response_obj = kwargs["complete_streaming_response"] + elif kwargs.get("async_complete_streaming_response"): + response_obj = kwargs["async_complete_streaming_response"] + else: + print_verbose("opik skipping chunk; waiting for end...") + return + else: + print_verbose("opik non-stream logging") + + # These can be set in the metadata, or in environment: + workspace = None + project_name = None + host = None + # litellm metadata: + metadata = kwargs.get("litellm_params", {}).get("metadata", {}) + # ----- + litellm_opik_metadata = metadata.get("opik", {}) + # Opik specific: + workspace = litellm_opik_metadata.get("workspace", None) + project_name = litellm_opik_metadata.get("project_name", None) + host = litellm_opik_metadata.get("host", None) + current_span_id = get_current_span_id(litellm_opik_metadata) + current_trace_id = get_current_trace_id(litellm_opik_metadata) + opik_metadata = litellm_opik_metadata.get("metadata", None) + opik_tags = litellm_opik_metadata.get("tags", []) + + client = self.opik.Opik( + workspace=workspace, + project_name=project_name, + host=host, + ) + + span_name = "%s_%s_%s" % ( + response_obj.get("model", "unknown-model"), + response_obj.get("object", "unknown-object"), + response_obj.get("created", 0), + ) + trace_name = response_obj.get("object", "unknown type") + + input_data = redact_secrets(kwargs) + output_data = model_response_to_dict(response_obj) + metadata = opik_metadata or {} + metadata["created_from"] = "litellm" + if kwargs.get("custom_llm_provider"): + opik_tags.append(kwargs["custom_llm_provider"]) + if "object" in response_obj: + metadata["type"] = response_obj["object"] + if "model" in response_obj: + metadata["model"] = response_obj["model"] + if "response_cost" in kwargs: + metadata["cost"] = { + "total_tokens": kwargs["response_cost"], + "currency": "USD" + } + + if current_trace_id is not None: + print_verbose(pformat("opik trace found!")) + else: + print_verbose(pformat("new opik trace created!")) + trace = client.trace( + name=trace_name, + input=input_data, + output=output_data, + metadata=metadata, + start_time=start_time, + end_time=end_time, + tags=opik_tags, + ) + current_trace_id = trace.id + + span = client.span( + trace_id=current_trace_id, + parent_span_id=current_span_id, + name=span_name, + type="llm", + input=input_data, + output=output_data, + metadata=metadata, + usage=output_data.get("usage"), + start_time=start_time, + end_time=end_time, + tags=opik_tags, + ) + client.flush() diff --git a/litellm/integrations/opik/utils.py b/litellm/integrations/opik/utils.py new file mode 100644 index 000000000000..2f5e350ddec0 --- /dev/null +++ b/litellm/integrations/opik/utils.py @@ -0,0 +1,73 @@ +from typing import Any, Dict + +from litellm.types.utils import ModelResponse + +BLUE = "\033[94m" +RED = "\033[91m" +RESET = "\033[0m" + +def pformat(text: str, level: str="info") -> str: + """ + Format the text with colors. + + Args: + text: the text to format + info: the mode ("input" or "error") + + Returns a formatted string + """ + return "%s%s%s" % (BLUE if level == "info" else RED, text, RESET) + +def get_current_span_id(metadata: Dict[str, Any]) -> str: + from opik.opik_context import get_current_span_data + + if metadata.get("current_span_data"): + current_span_data = metadata.get("current_span_data") + else: + current_span_data = get_current_span_data() + + if current_span_data: + return current_span_data.id + else: + return None + + +def get_current_trace_id(metadata: Dict[str, Any]) -> str: + from opik.opik_context import get_current_trace_data + + if metadata.get("current_trace_data"): + current_trace_data = metadata.get("current_trace_data") + else: + current_trace_data = get_current_trace_data() + + if current_trace_data: + return current_trace_data.id + else: + return None + + +def model_response_to_dict(response_obj: ModelResponse) -> Dict: + """ + Convert the ModelResponse to a dictionary. + + Args: + response_obj: the ModelResponse from the model vendor, standardized + + Returns a dictionary + """ + return response_obj.to_dict() + +def redact_secrets(item): + """ + Recursively redact sensitive information + """ + if isinstance(item, dict): + redacted_dict = {} + for key, value in item.items(): + value = redact_secrets(value) + if key == "api_key": + value = "***REDACTED***" + redacted_dict[key] = value + return redacted_dict + else: + return item diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index f3af2dcbdf7e..5e18620b70b7 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -61,6 +61,8 @@ from ..integrations.berrispend import BerriSpendLogger from ..integrations.braintrust_logging import BraintrustLogger from ..integrations.clickhouse import ClickhouseLogger +from ..integrations.opik.opik import OpikLogger +from ..integrations.custom_logger import CustomLogger from ..integrations.datadog.datadog import DataDogLogger from ..integrations.dynamodb import DyanmoDBLogger from ..integrations.galileo import GalileoObserve @@ -122,6 +124,7 @@ berrispendLogger = None supabaseClient = None liteDebuggerClient = None +opikLogger = None callback_list: Optional[List[str]] = [] user_logger_fn = None additional_details: Optional[Dict[str, str]] = {} @@ -184,7 +187,7 @@ def set_cache(self, credentials: dict, service_name: str, logging_obj: Any) -> N class Logging: - global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app + global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app, opikLogger custom_pricing: bool = False stream_options = None @@ -1325,6 +1328,15 @@ def success_handler( print_verbose=print_verbose, callback_func=callback, ) + if callback == "opik": + print_verbose("reaches opik for logging!") + opikLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) except Exception as e: print_verbose( @@ -2018,7 +2030,7 @@ def set_callbacks(callback_list, function_id=None): """ Globally sets the callback client """ - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger, opikLogger try: for callback in callback_list: @@ -2118,6 +2130,8 @@ def set_callbacks(callback_list, function_id=None): liteDebuggerClient = LiteDebugger(email=litellm.email) else: liteDebuggerClient = LiteDebugger(email=str(uuid.uuid4())) + elif callback == "opik": + opikLogger = OpikLogger() elif callable(callback): customLogger = CustomLogger() except Exception as e: diff --git a/litellm/utils.py b/litellm/utils.py index 9d63e1151112..ca6add7eb17f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -179,6 +179,7 @@ executor = ThreadPoolExecutor(max_workers=MAX_THREADS) sentry_sdk_instance = None capture_exception = None +opikLogger = None add_breadcrumb = None posthog = None slack_app = None @@ -191,6 +192,7 @@ weightsBiasesLogger = None customLogger = None langFuseLogger = None +opikLogger = None openMeterLogger = None lagoLogger = None dataDogLogger = None diff --git a/tests/local_testing/test_opik.py b/tests/local_testing/test_opik.py new file mode 100644 index 000000000000..1a52e344c549 --- /dev/null +++ b/tests/local_testing/test_opik.py @@ -0,0 +1,162 @@ +import asyncio +import io +import os +import pytest +import sys +import litellm + +litellm.num_retries = 3 + +OPIK_API_KEY = os.environ.get("OPIK_API_KEY") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") +ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY") + +def get_test_name(): + return os.environ.get('PYTEST_CURRENT_TEST', "pytest") + +@pytest.mark.asyncio +@pytest.mark.skipif(OPENAI_API_KEY is None, reason="OPEN_API_KEY not found in env") +@pytest.mark.skipif(OPIK_API_KEY is None, reason="OPIK_API_KEY not found in env") +async def test_opik_with_router(): + litellm.set_verbose = True + litellm.success_callback = ["opik"] + model_list = [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": OPENAI_API_KEY, + }, + } + ] + router = litellm.Router(model_list=model_list) + response = await router.acompletion( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": "Why is Opik logging and evaluation important?"} + ], + metadata = { + "opik": { + "metadata": { + "test_name": get_test_name(), + }, + "tags": ["test"], + }, + }, + ) + assert response.usage.prompt_tokens == 16 + +@pytest.mark.skipif(ANTHROPIC_API_KEY is None, reason="ANTHROPIC_API_KEY not found in env") +@pytest.mark.skipif(OPIK_API_KEY is None, reason="OPIK_API_KEY not found in env") +def test_opik_completion_with_anthropic(): + litellm.set_verbose = True + litellm.success_callback = ["opik"] + response = litellm.completion( + model="claude-instant-1.2", + messages=[{"role": "user", "content": "Why is Opik logging and evaluation important?"}], + max_tokens=10, + temperature=0.2, + metadata = { + "opik": { + "metadata": { + "test_name": get_test_name(), + }, + "tags": ["test"], + }, + }, + ) + assert response.usage.prompt_tokens == 18 + +@pytest.mark.skipif(OPENAI_API_KEY is None, reason="OPEN_API_KEY not found in env") +@pytest.mark.skipif(OPIK_API_KEY is None, reason="OPIK_API_KEY not found in env") +def test_opik_with_openai(): + litellm.set_verbose = True + litellm.success_callback = ["opik"] + response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Why is Opik logging and evaluation important?"}], + max_tokens=10, + temperature=0.2, + metadata = { + "opik": { + "metadata": { + "test_name": get_test_name(), + }, + "tags": ["test"], + }, + }, + ) + assert response.usage.prompt_tokens == 16 + + +@pytest.mark.skipif(OPENAI_API_KEY is None, reason="OPEN_API_KEY not found in env") +@pytest.mark.skipif(OPIK_API_KEY is None, reason="OPIK_API_KEY not found in env") +def test_opik_with_openai_and_track(): + from opik import track, flush_tracker + from opik.opik_context import get_current_span_data, get_current_trace_data + + litellm.set_verbose = True + litellm.success_callback = ["opik"] + + @track() + def complete_function(input): + assert get_current_span_data() is not None + assert get_current_trace_data() is not None + response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + { + "content": input, + "role": "user" + }, + ], + metadata={ + "opik": { + "current_span_data": get_current_span_data(), + "current_trace_data": get_current_trace_data(), + "tags": ["test"], + "metadata": { + "test_name": get_test_name(), + }, + }, + }, + ) + return response.to_dict() + + response = complete_function("Why is Opik logging and evaluation important?") + flush_tracker() + assert response["usage"]["prompt_tokens"] == 16 + +@pytest.mark.skipif(OPENAI_API_KEY is None, reason="OPEN_API_KEY not found in env") +@pytest.mark.skipif(OPIK_API_KEY is None, reason="OPIK_API_KEY not found in env") +def test_opik_with_streaming_openai(): + from opik import track, flush_tracker + from opik.opik_context import get_current_trace_data, get_current_span_data + + litellm.set_verbose = True + litellm.success_callback = ["opik"] + + @track() + def streaming_function(input): + messages = [{"role": "user", "content": input}] + response = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + metadata = { + "opik": { + "current_span_data": get_current_span_data(), + "current_trace_data": get_current_trace_data(), + "metadata": { + "test_name": get_test_name(), + }, + "tags": ["test"], + }, + }, + stream=True, + ) + return response + + response = streaming_function("Why is Opik logging and evaluation important?") + chunks = list(response) + flush_tracker() + assert len(chunks) > 10