diff --git a/docs/source/guides/snippets/metrics/metric_defs.py b/docs/source/guides/snippets/metrics/metric_defs.py index ca48dce9ddb..e885cc09354 100644 --- a/docs/source/guides/snippets/metrics/metric_defs.py +++ b/docs/source/guides/snippets/metrics/metric_defs.py @@ -1,8 +1,8 @@ from __future__ import annotations -import bentoml +import prometheus_client -inference_duration = bentoml.metrics.Histogram( +inference_duration = prometheus_client.Histogram( name="inference_duration", documentation="Duration of inference", labelnames=["nltk_version", "sentiment_cls"], @@ -25,7 +25,7 @@ ), ) -polarity_counter = bentoml.metrics.Counter( +polarity_counter = prometheus_client.Counter( name="polarity_total", documentation="Count total number of analysis by polarity scores", labelnames=["polarity"], diff --git a/src/_bentoml_impl/server/app.py b/src/_bentoml_impl/server/app.py index 884e3382c92..5ef728ae39b 100644 --- a/src/_bentoml_impl/server/app.py +++ b/src/_bentoml_impl/server/app.py @@ -29,6 +29,7 @@ if t.TYPE_CHECKING: from opentelemetry.sdk.trace import Span + from prometheus_client import Histogram from starlette.applications import Starlette from starlette.requests import Request from starlette.routing import BaseRoute @@ -36,7 +37,6 @@ from bentoml._internal import external_typing as ext from bentoml._internal.context import ServiceContext from bentoml._internal.types import LifecycleHook - from bentoml.metrics import Histogram R = t.TypeVar("R") diff --git a/src/_bentoml_impl/worker/service.py b/src/_bentoml_impl/worker/service.py index 103563df5e0..b579e05e124 100644 --- a/src/_bentoml_impl/worker/service.py +++ b/src/_bentoml_impl/worker/service.py @@ -174,6 +174,9 @@ def main( if prometheus_dir is not None: BentoMLContainer.prometheus_multiproc_dir.set(prometheus_dir) + os.environ["PROMETHEUS_MULTIPROC_DIR"] = ( + BentoMLContainer.prometheus_multiproc_dir.get() + ) server_context.service_name = service.name if service.bento is None: server_context.bento_name = service.name diff --git a/src/bentoml/_internal/server/metrics/prometheus.py b/src/bentoml/_internal/server/metrics/prometheus.py index 0e58fbbf6cb..72081ad5f7b 100644 --- a/src/bentoml/_internal/server/metrics/prometheus.py +++ b/src/bentoml/_internal/server/metrics/prometheus.py @@ -2,12 +2,13 @@ import logging import os -import sys import typing as t from functools import partial from typing import TYPE_CHECKING if TYPE_CHECKING: + from prometheus_client import Metric + from ... import external_typing as ext logger = logging.getLogger(__name__) @@ -45,20 +46,6 @@ def __init__( @property def prometheus_client(self): - if self.multiproc and not self._imported: - # step 1: check environment - assert ( - "prometheus_client" not in sys.modules - ), "prometheus_client is already imported, multiprocessing will not work properly" - - assert ( - self.multiproc_dir - ), f"Invalid prometheus multiproc directory: {self.multiproc_dir}" - assert os.path.isdir(self.multiproc_dir) - - os.environ["PROMETHEUS_MULTIPROC_DIR"] = self.multiproc_dir - - # step 2: import prometheus_client import prometheus_client.exposition import prometheus_client.metrics @@ -173,6 +160,10 @@ def Metric(self): """ A Metric family and its samples. - This is a base class to be used by instrumentation client. Custom collectors should use ``bentoml.metrics.metrics_core.GaugeMetricFamily``, ``bentoml.metrics.metrics_core.CounterMetricFamily``, ``bentoml.metrics.metrics_core.SummaryMetricFamily`` instead. + This is a base class to be used by instrumentation client. + Custom collectors should use + ``prometheus_client.metrics_core.GaugeMetricFamily``, + ``prometheus_client.metrics_core.CounterMetricFamily``, + ``prometheus_client.metrics_core.SummaryMetricFamily`` instead. """ return partial(self.prometheus_client.Metric, registry=self.registry) diff --git a/src/bentoml/metrics.py b/src/bentoml/metrics.py index 1640c66ac1a..30580ca630f 100644 --- a/src/bentoml/metrics.py +++ b/src/bentoml/metrics.py @@ -1,434 +1,13 @@ -from __future__ import annotations +import sys +import warnings -import logging -import typing as t -from typing import TYPE_CHECKING +import prometheus_client -from simple_di import Provide -from simple_di import inject +warnings.warn( + "bentoml.metrics module is deprecated and will be removed in the future. " + "Please use prometheus_client directly for metrics reporting.", + DeprecationWarning, + stacklevel=1, +) -from ._internal.configuration.containers import BentoMLContainer - -if TYPE_CHECKING: - from ._internal.server.metrics.prometheus import PrometheusClient - -logger = logging.getLogger(__name__) - -# NOTE: We have to set our docstring here due to the fact that -# we are lazy loading the metrics. This means that the docstring -# won't be discovered until the metrics is initialized. -# this won't work with help() or doocstring on Sphinx. -# While this is less than optimal, we will do this since 'bentoml.metrics' -# is a public API. -_MAKE_WSGI_APP_DOCSTRING = """\ -Create a WSGI app which serves the metrics from a registry. - -Returns: - WSGIApp: A WSGI app which serves the metrics from a registry. -""" -_GENERATE_LATEST_DOCSTRING = """\ -Returns metrics from the registry in latest text format as a string. - -This function ensures that multiprocess is setup correctly. - -Returns: - str: Metrics in latest text format. Refer to `Exposition format `_ for details. -""" -_TEXT_STRING_TO_METRIC_DOCSTRING = """ -Parse Prometheus text format from a unicode string. - -Returns: - Metric: A generator that yields `Metric `_ objects. -""" -_HISTOGRAM_DOCSTRING = """\ -A Histogram tracks the size and number of events in a given bucket. - -Histograms are often used to aggregatable calculation of quantiles. -Some notable examples include measuring response latency, request size. - -A quick example of a Histogram: - -.. code-block:: python - - from bentoml.metrics import Histogram - - h = Histogram('request_size_bytes', 'Request size (bytes)') - - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - h.observe(512) # Observe 512 (bytes) - ... - -``observe()`` will observe for given amount of time. -Usually, this value are positive or zero. Negative values are accepted but will -prevent current versions of Prometheus from properly detecting counter resets in the `sum of observations `_. - -Histograms also provide ``time()``, which times a block of code or function, and observe for a given duration amount. -This function can also be used as a context manager. - -.. tab-set:: - - .. tab-item:: Example - - .. code-block:: python - - from bentoml.metrics import Histogram - - REQUEST_TIME = Histogram('response_latency_seconds', 'Response latency (seconds)') - - @REQUEST_TIME.time() - def create_response(request): - body = await request.json() - return Response(body) - - .. tab-item:: Context Manager - - .. code-block:: python - - from bentoml.metrics import Histogram - - REQUEST_TIME = Histogram('response_latency_seconds', 'Response latency (seconds)') - - def create_response(request): - body = await request.json() - with REQUEST_TIME.time(): - ... - -The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds. -See :ref:`configuration guides ` to see how to customize the buckets. - -Args: - name (str): The name of the metric. - documentation (str): A documentation string. - labelnames (tuple[str]): A tuple of strings specifying the label names for the metric. Defaults to ``()``. - namespace (str): The namespace of the metric. Defaults to an empty string. - subsystem (str): The subsystem of the metric. Defaults to an empty string. - unit (str): The unit of the metric. Defaults to an empty string. - buckets (list[float]): A list of float representing a bucket. Defaults to ``(.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)``. -""" -_COUNTER_DOCSTRING = """ -A Counter tracks counts of events or running totals. - -.. epigraph:: - - It is a cumulative metric that represents a single `monotonically increasing counter `_ whose value can only increase or be reset to zero on restart. - -Some notable examples include counting the number of requests served, tasks completed, or errors. - -If you need to go down, uses :func:`bentoml.metrics.Gauge` instead. - -A quick example of a Counter: - -.. code-block:: python - - from bentoml.metrics import Counter - - c = Counter('failures', 'Total number of failures requests') - - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - if input_data['fail']: - c.inc() # increment by 1 by default - -``inc()`` can optionally pass in a ``exemplar``, which is a dictionary of keys and values, defined :github:`here `. - -``inc()`` can also increment by any given amount: - -.. code-block:: python - - c.inc(2.1) - -``count_exceptions()`` can be used as both a decorator and context manager to count exceptions raised. - -.. tab-set:: - - .. tab-item:: Decorator - - .. code-block:: python - - from bentoml.metrics import Counter - - c = Counter('failures', 'Total number of failures requests') - - @c.count_exceptions() - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - if input_data['acc'] < 0.5: - raise ValueError("Given data is not accurate.") - - .. tab-item:: Context Manager - - .. code-block:: python - - from bentoml.metrics import Histogram - - c = Counter('failures', 'Total number of failures requests') - - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - with c.count_exceptions(): - if input_data['acc'] < 0.5: - raise ValueError("Given data is not accurate.") - with c.count_exceptions(RuntimeError): - if input_data['output'] is None: - raise RuntimeError("Given pre-processing logic is invalid") - -``count_exceptions()`` will optionally take in an exception to only track specific exceptions. - -.. code-block:: python - - ... - with c.count_exceptions(RuntimeError): - if input_data['output'] is None: - raise RuntimeError("Given pre-processing logic is invalid") - -Args: - name (str): The name of the metric. - documentation (str): A documentation string. - labelnames (tuple[str]): A tuple of strings specifying the label names for the metric. Defaults to ``()``. - namespace (str): The namespace of the metric. Defaults to an empty string. - subsystem (str): The subsystem of the metric. Defaults to an empty string. - unit (str): The unit of the metric. Defaults to an empty string. -""" -_SUMMARY_DOCSTRING = """ -A Summary tracks the size and `samples observations (usually things like request durations and response sizes).`. - -While it also provides a total count of observations and a sum of all observed values, -it calculates configurable quantiles over a sliding time window. - -Notable examples include request latency and response size. - -A quick example of a Summary: - -.. code-block:: python - - from bentoml.metrics import Summary - - s = Summary('request_size_bytes', 'Request size (bytes)') - - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - s.observe(512) # Observe 512 (bytes) - ... - -``observe()`` will observe for given amount of time. -Usually, this value are positive or zero. Negative values are accepted but will -prevent current versions of Prometheus from properly detecting counter resets in the `sum of observations `_. - -Similar to :meth:`bentoml.metrics.Histogram`, ``time()`` can also be used as a decorator or context manager. - -.. tab-set:: - - .. tab-item:: Example - - .. code-block:: python - - from bentoml.metrics import Histogram - - s = Summary('response_latency_seconds', 'Response latency (seconds)') - - @s.time() - def create_response(request): - body = await request.json() - return Response(body) - - .. tab-item:: Context Manager - - .. code-block:: python - - from bentoml.metrics import Histogram - - s = Summary('response_latency_seconds', 'Response latency (seconds)') - - def create_response(request): - body = await request.json() - with s.time(): - ... - -Args: - name (str): The name of the metric. - documentation (str): A documentation string. - labelnames (tuple[str]): A tuple of strings specifying the label names for the metric. Defaults to ``()``. - namespace (str): The namespace of the metric. Defaults to an empty string. - subsystem (str): The subsystem of the metric. Defaults to an empty string. - unit (str): The unit of the metric. Defaults to an empty string. -""" -_GAUGE_DOCSTRING = """ -A Gauge represents a single numerical value that can arbitrarily go up and down. - -Gauges are typically used to for report instantaneous values like temperatures or current memory usage. -One can think of Gauge as a :meth:`bentoml.metrics.Counter` that can go up and down. - -Notable examples include in-progress requests, number of item in a queue, and free memory. - -A quick example of a Gauge: - -.. code-block:: python - - from bentoml.metrics import Gauge - - g = Gauge('inprogress_request', 'Request inprogress') - - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - g.inc() # increment by 1 by default - g.dec(10) # decrement by any given value - g.set(0) # set to a given value - ... - -.. note:: - - By default, ``inc()`` and ``dec()`` will increment and decrement by 1 respectively. - -Gauge also provide ``track_inprogress()``, to track inprogress object. -This function can also be used as either a context manager or a decorator. - -.. tab-set:: - - .. tab-item:: Example - - .. code-block:: python - - from bentoml.metrics import Gauge - - g = Gauge('inprogress_request', 'Request inprogress') - - @svc.api(input=JSON(), output=JSON()) - @g.track_inprogress() - def predict(input_data: dict[str, str]): - ... - - .. tab-item:: Context Manager - - .. code-block:: python - - from bentoml.metrics import Gauge - - g = Gauge('inprogress_request', 'Request inprogress') - - @svc.api(input=JSON(), output=JSON()) - def predict(input_data: dict[str, str]): - with g.track_inprogress(): - ... - - The gauge will increment when the context is entered and decrement when the context is exited. - -Args: - name (str): The name of the metric. - documentation (str): A documentation string. - labelnames (tuple[str]): A tuple of strings specifying the label names for the metric. Defaults to ``()``. - namespace (str): The namespace of the metric. Defaults to an empty string. - subsystem (str): The subsystem of the metric. Defaults to an empty string. - unit (str): The unit of the metric. Defaults to an empty string. - multiprocess_mode (str): The multiprocess mode of the metric. Defaults to ``all``. Available options - are (``all``, ``min``, ``max``, ``livesum``, ``liveall``) -""" - -# This sets of functions are implemented in the PrometheusClient class -_INTERNAL_FN_IMPL = { - "make_wsgi_app": _MAKE_WSGI_APP_DOCSTRING, - "generate_latest": _GENERATE_LATEST_DOCSTRING, - "text_string_to_metric_families": _TEXT_STRING_TO_METRIC_DOCSTRING, -} -_NOT_IMPLEMENTED = [ - "delete_from_gateway", - "instance_ip_grouping_key", - "push_to_gateway", - "pushadd_to_gateway", -] -_NOT_SUPPORTED = [ - "GC_COLLECTOR", - "GCCollector", - "PLATFORM_COLLECTOR", - "PlatformCollector", - "PROCESS_COLLECTOR", - "ProcessCollector", - "REGISTRY", - "CONTENT_TYPE_LATEST", - "start_http_server", - "start_wsgi_server", - "make_asgi_app", - "write_to_textfile", -] + _NOT_IMPLEMENTED -_docstring = { - "Counter": _COUNTER_DOCSTRING, - "Histogram": _HISTOGRAM_DOCSTRING, - "Summary": _SUMMARY_DOCSTRING, - "Gauge": _GAUGE_DOCSTRING, -} -_docstring.update(_INTERNAL_FN_IMPL) - - -def __dir__() -> list[str]: - # This is for IPython and IDE autocompletion. - metrics_client = BentoMLContainer.metrics_client.get() - return list(set(dir(metrics_client.prometheus_client)) - set(_NOT_SUPPORTED)) - - -def __getattr__(item: t.Any): - if item in _NOT_SUPPORTED: - raise NotImplementedError( - f"{item} is not supported when using '{__name__}'. See https://docs.bentoml.com/en/latest/reference/metrics.html." - ) - # This is the entrypoint for all bentoml.metrics.* - return _LazyMetric(item, docstring=_docstring.get(item)) - - -class _LazyMetric: - __slots__ = ("_attr", "_proxy", "_initialized", "_args", "_kwargs", "__doc__") - - def __init__(self, attr: str, docstring: str | None = None): - self._attr = attr - self.__doc__ = docstring - self._proxy = None - self._initialized = False - self._args: tuple[t.Any, ...] = () - self._kwargs: dict[str, t.Any] = {} - - def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any: - """ - Lazily initialize the metrics object. - - Args: - *args: Arguments to pass to the metrics object. - **kwargs: Keyword arguments to pass to the metrics object. - """ - if "registry" in kwargs: - raise ValueError( - f"'registry' should not be passed when using '{__name__}.{self._attr}'. See https://docs.bentoml.com/en/latest/reference/metrics.html." - ) - self._args = args - self._kwargs = kwargs - if self._attr in _INTERNAL_FN_IMPL: - # first-class function implementation from BentoML Prometheus client. - # In this case, the function will be called directly. - return self._load_proxy() - return self - - def __getattr__(self, item: t.Any) -> t.Any: - if item in self.__slots__: - raise AttributeError(f"Attribute {item} is private to {self}.") - if self._proxy is None: - self._proxy = self._load_proxy() - assert self._initialized and self._proxy is not None - return getattr(self._proxy, item) - - def __dir__(self) -> list[str]: - if self._proxy is None: - self._proxy = self._load_proxy() - assert self._initialized and self._proxy is not None - return dir(self._proxy) - - @inject - def _load_proxy( - self, - metrics_client: PrometheusClient = Provide[BentoMLContainer.metrics_client], - ) -> None: - client_impl = ( - metrics_client - if self._attr in dir(metrics_client) - else metrics_client.prometheus_client - ) - proxy = getattr(client_impl, self._attr)(*self._args, **self._kwargs) - self._initialized = True - return proxy +sys.modules[__name__] = prometheus_client diff --git a/src/bentoml_cli/worker/grpc_api_server.py b/src/bentoml_cli/worker/grpc_api_server.py index 1610fc594f1..142c5491d3c 100644 --- a/src/bentoml_cli/worker/grpc_api_server.py +++ b/src/bentoml_cli/worker/grpc_api_server.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import os import typing as t import click @@ -125,6 +126,9 @@ def main( BentoMLContainer.development_mode.set(development_mode) if prometheus_dir is not None: BentoMLContainer.prometheus_multiproc_dir.set(prometheus_dir) + os.environ["PROMETHEUS_MULTIPROC_DIR"] = ( + BentoMLContainer.prometheus_multiproc_dir.get() + ) if runner_map is not None: BentoMLContainer.remote_runner_mapping.set(json.loads(runner_map)) diff --git a/src/bentoml_cli/worker/grpc_prometheus_server.py b/src/bentoml_cli/worker/grpc_prometheus_server.py index 9c1a358aec2..6737662782c 100644 --- a/src/bentoml_cli/worker/grpc_prometheus_server.py +++ b/src/bentoml_cli/worker/grpc_prometheus_server.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import typing as t from typing import TYPE_CHECKING @@ -68,7 +69,9 @@ def main(fd: int, backlog: int, prometheus_dir: str | None): metrics_client = BentoMLContainer.metrics_client.get() if prometheus_dir is not None: BentoMLContainer.prometheus_multiproc_dir.set(prometheus_dir) - + os.environ["PROMETHEUS_MULTIPROC_DIR"] = ( + BentoMLContainer.prometheus_multiproc_dir.get() + ) # create a ASGI app that wraps around the default HTTP prometheus server. prom_app = Starlette( debug=get_debug_mode(), middleware=[Middleware(GenerateLatestMiddleware)] diff --git a/src/bentoml_cli/worker/http_api_server.py b/src/bentoml_cli/worker/http_api_server.py index 31ddc54d578..a391d03c33e 100644 --- a/src/bentoml_cli/worker/http_api_server.py +++ b/src/bentoml_cli/worker/http_api_server.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import os import socket import typing as t @@ -152,7 +153,9 @@ def main( BentoMLContainer.development_mode.set(development_mode) if prometheus_dir is not None: BentoMLContainer.prometheus_multiproc_dir.set(prometheus_dir) - + os.environ["PROMETHEUS_MULTIPROC_DIR"] = ( + BentoMLContainer.prometheus_multiproc_dir.get() + ) if runner_map is not None: BentoMLContainer.remote_runner_mapping.set(json.loads(runner_map)) if timeout is not None: diff --git a/src/bentoml_cli/worker/runner.py b/src/bentoml_cli/worker/runner.py index 56147cfa156..cae3ab724fd 100644 --- a/src/bentoml_cli/worker/runner.py +++ b/src/bentoml_cli/worker/runner.py @@ -106,7 +106,9 @@ def main( if prometheus_dir is not None: BentoMLContainer.prometheus_multiproc_dir.set(prometheus_dir) - + os.environ["PROMETHEUS_MULTIPROC_DIR"] = ( + BentoMLContainer.prometheus_multiproc_dir.get() + ) if no_access_log: access_log_config = BentoMLContainer.runners_config.logging.access access_log_config.enabled.set(False) diff --git a/tests/e2e/bento_server_grpc/service.py b/tests/e2e/bento_server_grpc/service.py index 587f4a39810..5c265bd2e78 100644 --- a/tests/e2e/bento_server_grpc/service.py +++ b/tests/e2e/bento_server_grpc/service.py @@ -4,6 +4,7 @@ import typing as t from typing import TYPE_CHECKING +import prometheus_client from context_server_interceptor import AsyncContextInterceptor from pydantic import BaseModel @@ -171,7 +172,7 @@ async def echo_image(f: PIL.Image.Image) -> NDArray[t.Any]: return np.array(f) -histogram = bentoml.metrics.Histogram( +histogram = prometheus_client.Histogram( name="inference_latency", documentation="Inference latency in seconds", labelnames=["model_name", "model_version"], @@ -199,9 +200,12 @@ async def predict_multi_images(original: Image, compared: Image): @svc.api(input=bentoml.io.Text(), output=bentoml.io.Text()) def ensure_metrics_are_registered(_: str) -> None: + from prometheus_client import generate_latest + from prometheus_client.parser import text_string_to_metric_families + histograms = [ m.name - for m in bentoml.metrics.text_string_to_metric_families() + for m in text_string_to_metric_families(generate_latest().decode()) if m.type == "histogram" ] assert "inference_latency" in histograms diff --git a/tests/e2e/bento_server_http/service.py b/tests/e2e/bento_server_http/service.py index 534a1c4ed7e..c9a1e65bd46 100644 --- a/tests/e2e/bento_server_http/service.py +++ b/tests/e2e/bento_server_http/service.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +import prometheus_client import pydantic from fastapi import FastAPI from PIL.Image import Image as PILImage @@ -59,7 +60,7 @@ async def count_text_stream(self, input_text: str) -> t.AsyncGenerator[str, None TEST_DIR = os.getenv("BENTOML_TEST_DATA") -metric_test = bentoml.metrics.Counter( +metric_test = prometheus_client.Counter( name="test_metrics", documentation="Counter test metric" ) @@ -78,9 +79,12 @@ async def echo_delay(data: dict[str, t.Any]) -> JSONSerializable: @svc.api(input=bentoml.io.Text(), output=bentoml.io.Text()) def ensure_metrics_are_registered(data: str) -> str: # pylint: disable=unused-argument + from prometheus_client import generate_latest + from prometheus_client.parser import text_string_to_metric_families + counters = [ m.name - for m in bentoml.metrics.text_string_to_metric_families() + for m in text_string_to_metric_families(generate_latest().decode()) if m.type == "counter" ] assert "test_metrics" in counters diff --git a/tests/unit/test_metrics.py b/tests/unit/test_metrics.py deleted file mode 100644 index d243671e2bd..00000000000 --- a/tests/unit/test_metrics.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import annotations - -import bentoml - - -def test_metrics_initialization(): - o = bentoml.metrics.Gauge(name="test_metrics", documentation="test") - assert isinstance(o, bentoml.metrics._LazyMetric) - assert o._proxy is None - o = bentoml.metrics.Histogram(name="test_metrics", documentation="test") - assert isinstance(o, bentoml.metrics._LazyMetric) - assert o._proxy is None - o = bentoml.metrics.Counter(name="test_metrics", documentation="test") - assert isinstance(o, bentoml.metrics._LazyMetric) - assert o._proxy is None - o = bentoml.metrics.Summary(name="test_metrics", documentation="test") - assert isinstance(o, bentoml.metrics._LazyMetric) - assert o._proxy is None