From 54cd47ee17f42cee4297aa2c02d5be3285ffed13 Mon Sep 17 00:00:00 2001 From: RobertSamoilescu Date: Wed, 22 May 2024 10:55:58 +0100 Subject: [PATCH] Inference streaming support (#1750) * Included generate, generate_stream, infer_stream endpoints. * Implemented rest infer_stream. * Included adaptive batching hooks for predict_stream. * Included grpc stream proto. * Implemented ModelInferStream as stream-stream method. * Included lazy fixtures as depenedency. * Included tests for infer_stream endpoint and ModelInferStream. * Introduced gzip_enabled flag. * Included grpc stream error handling. --------- Co-authored-by: Adrian Gonzalez-Martin --- .../models/text-model/model-settings.json | 22 +++ .../models/text-model/settings.json | 6 + .../models/text-model/text_model.py | 45 +++++++ mlserver/batching/hooks.py | 64 ++++++--- mlserver/grpc/dataplane_pb2.py | 5 +- mlserver/grpc/dataplane_pb2_grpc.py | 45 +++++++ mlserver/grpc/servicers.py | 75 ++++++++--- mlserver/grpc/utils.py | 15 +++ mlserver/handlers/dataplane.py | 125 +++++++++++++----- mlserver/model.py | 21 ++- mlserver/rest/app.py | 40 +++++- mlserver/rest/endpoints.py | 45 ++++++- mlserver/rest/responses.py | 56 +++++--- mlserver/settings.py | 3 + poetry.lock | 47 +++++-- proto/dataplane.proto | 6 + pyproject.toml | 2 + runtimes/alibi-detect/poetry.lock | 88 +++++++++++- runtimes/alibi-explain/poetry.lock | 87 +++++++++++- runtimes/catboost/poetry.lock | 82 +++++++++++- runtimes/huggingface/poetry.lock | 102 ++++++++++++-- runtimes/lightgbm/poetry.lock | 76 ++++++++++- runtimes/mlflow/poetry.lock | 94 +++++++++++-- runtimes/mllib/poetry.lock | 76 ++++++++++- runtimes/sklearn/poetry.lock | 82 +++++++++++- runtimes/xgboost/poetry.lock | 76 ++++++++++- tests/batching/test_hooks.py | 20 +++ tests/conftest.py | 51 ++++++- tests/fixtures.py | 43 +++++- tests/grpc/conftest.py | 6 + tests/grpc/test_servicers.py | 60 ++++++++- tests/handlers/test_dataplane.py | 17 +++ tests/rest/test_endpoints.py | 116 ++++++++++++---- tests/rest/test_responses.py | 13 ++ tests/test_model.py | 40 ++++++ tests/testdata/generate-request.json | 18 +++ .../testdata/grpc/model-generate-request.json | 19 +++ tests/testdata/settings-stream.json | 10 ++ 38 files changed, 1624 insertions(+), 174 deletions(-) create mode 100644 benchmarking/testserver/models/text-model/model-settings.json create mode 100644 benchmarking/testserver/models/text-model/settings.json create mode 100644 benchmarking/testserver/models/text-model/text_model.py create mode 100644 tests/rest/test_responses.py create mode 100644 tests/testdata/generate-request.json create mode 100644 tests/testdata/grpc/model-generate-request.json create mode 100644 tests/testdata/settings-stream.json diff --git a/benchmarking/testserver/models/text-model/model-settings.json b/benchmarking/testserver/models/text-model/model-settings.json new file mode 100644 index 000000000..12eed8415 --- /dev/null +++ b/benchmarking/testserver/models/text-model/model-settings.json @@ -0,0 +1,22 @@ +{ + "name": "text-model", + + "implementation": "text_model.TextModel", + + "versions": ["text-model/v1.2.3"], + "platform": "mlserver", + "inputs": [ + { + "datatype": "BYTES", + "name": "prompt", + "shape": [1] + } + ], + "outputs": [ + { + "datatype": "BYTES", + "name": "output", + "shape": [1] + } + ] +} diff --git a/benchmarking/testserver/models/text-model/settings.json b/benchmarking/testserver/models/text-model/settings.json new file mode 100644 index 000000000..b5af111b4 --- /dev/null +++ b/benchmarking/testserver/models/text-model/settings.json @@ -0,0 +1,6 @@ +{ + "debug": false, + "parallel_workers": 0, + "gzip_enabled": false, + "metrics_endpoint": null +} diff --git a/benchmarking/testserver/models/text-model/text_model.py b/benchmarking/testserver/models/text-model/text_model.py new file mode 100644 index 000000000..4475b3c92 --- /dev/null +++ b/benchmarking/testserver/models/text-model/text_model.py @@ -0,0 +1,45 @@ +import asyncio +from typing import AsyncIterator +from mlserver import MLModel +from mlserver.types import InferenceRequest, InferenceResponse +from mlserver.codecs import StringCodec + + +class TextModel(MLModel): + + async def predict(self, payload: InferenceRequest) -> InferenceResponse: + text = StringCodec.decode_input(payload.inputs[0])[0] + return InferenceResponse( + model_name=self._settings.name, + outputs=[ + StringCodec.encode_output( + name="output", + payload=[text], + use_bytes=True, + ), + ], + ) + + async def predict_stream( + self, payloads: AsyncIterator[InferenceRequest] + ) -> AsyncIterator[InferenceResponse]: + payload = [_ async for _ in payloads][0] + text = StringCodec.decode_input(payload.inputs[0])[0] + words = text.split(" ") + + split_text = [] + for i, word in enumerate(words): + split_text.append(word if i == 0 else " " + word) + + for word in split_text: + await asyncio.sleep(0.5) + yield InferenceResponse( + model_name=self._settings.name, + outputs=[ + StringCodec.encode_output( + name="output", + payload=[word], + use_bytes=True, + ), + ], + ) diff --git a/mlserver/batching/hooks.py b/mlserver/batching/hooks.py index 28d3071b1..f98d3c4f6 100644 --- a/mlserver/batching/hooks.py +++ b/mlserver/batching/hooks.py @@ -1,5 +1,5 @@ from functools import wraps -from typing import Awaitable, Callable, Optional +from typing import Awaitable, Callable, Optional, AsyncIterator from ..errors import MLServerError from ..model import MLModel @@ -20,6 +20,26 @@ def __init__(self, method_name: str, reason: Optional[str] = None): super().__init__(msg) +def _get_batcher(f: Callable) -> AdaptiveBatcher: + wrapped_f = get_wrapped_method(f) + model = _get_model(f) + + if not hasattr(model, _AdaptiveBatchingAttr): + raise InvalidBatchingMethod( + wrapped_f.__name__, reason="adaptive batching has not been loaded" + ) + + return getattr(model, _AdaptiveBatchingAttr) + + +def _get_model(f: Callable) -> MLModel: + wrapped_f = get_wrapped_method(f) + if not hasattr(wrapped_f, "__self__"): + raise InvalidBatchingMethod(wrapped_f.__name__, reason="method is not bound") + + return getattr(wrapped_f, "__self__") + + def adaptive_batching(f: Callable[[InferenceRequest], Awaitable[InferenceResponse]]): """ Decorator for the `predict()` method which will ensure it uses the @@ -28,24 +48,36 @@ def adaptive_batching(f: Callable[[InferenceRequest], Awaitable[InferenceRespons @wraps(f) async def _inner(payload: InferenceRequest) -> InferenceResponse: - wrapped_f = get_wrapped_method(f) - if not hasattr(wrapped_f, "__self__"): - raise InvalidBatchingMethod( - wrapped_f.__name__, reason="method is not bound" - ) - - model = getattr(wrapped_f, "__self__") - if not hasattr(model, _AdaptiveBatchingAttr): - raise InvalidBatchingMethod( - wrapped_f.__name__, reason="adaptive batching has not been loaded" - ) - - batcher = getattr(model, _AdaptiveBatchingAttr) + batcher = _get_batcher(f) return await batcher.predict(payload) return _inner +def not_implemented_warning( + f: Callable[[AsyncIterator[InferenceRequest]], AsyncIterator[InferenceResponse]], +): + """ + Decorator to lets users know that adaptive batching is not required on + method `f`. + """ + model = _get_model(f) + logger.warning( + f"Adaptive Batching is enabled for model '{model.name}'" + " but not supported for inference streaming." + " Falling back to non-batched inference streaming." + ) + + @wraps(f) + async def _inner_stream( + payload: AsyncIterator[InferenceRequest], + ) -> AsyncIterator[InferenceResponse]: + async for response in f(payload): + yield response + + return _inner_stream + + async def load_batching(model: MLModel) -> MLModel: if model.settings.max_batch_size <= 1: return model @@ -64,7 +96,7 @@ async def load_batching(model: MLModel) -> MLModel: batcher = AdaptiveBatcher(model) setattr(model, _AdaptiveBatchingAttr, batcher) - # Decorate predict method + # Decorate predict methods setattr(model, "predict", adaptive_batching(model.predict)) - + setattr(model, "predict_stream", not_implemented_warning(model.predict_stream)) return model diff --git a/mlserver/grpc/dataplane_pb2.py b/mlserver/grpc/dataplane_pb2.py index 948c1aa9f..b33d31f63 100644 --- a/mlserver/grpc/dataplane_pb2.py +++ b/mlserver/grpc/dataplane_pb2.py @@ -14,7 +14,7 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x0f\x64\x61taplane.proto\x12\tinference"\x13\n\x11ServerLiveRequest""\n\x12ServerLiveResponse\x12\x0c\n\x04live\x18\x01 \x01(\x08"\x14\n\x12ServerReadyRequest"$\n\x13ServerReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"2\n\x11ModelReadyRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"#\n\x12ModelReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"\x17\n\x15ServerMetadataRequest"K\n\x16ServerMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nextensions\x18\x03 \x03(\t"5\n\x14ModelMetadataRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"\xc5\x04\n\x15ModelMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08versions\x18\x02 \x03(\t\x12\x10\n\x08platform\x18\x03 \x01(\t\x12?\n\x06inputs\x18\x04 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x12\x44\n\nparameters\x18\x06 \x03(\x0b\x32\x30.inference.ModelMetadataResponse.ParametersEntry\x1a\xe2\x01\n\x0eTensorMetadata\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12S\n\nparameters\x18\x04 \x03(\x0b\x32?.inference.ModelMetadataResponse.TensorMetadata.ParametersEntry\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"\xee\x06\n\x11ModelInferRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12@\n\nparameters\x18\x04 \x03(\x0b\x32,.inference.ModelInferRequest.ParametersEntry\x12=\n\x06inputs\x18\x05 \x03(\x0b\x32-.inference.ModelInferRequest.InferInputTensor\x12H\n\x07outputs\x18\x06 \x03(\x0b\x32\x37.inference.ModelInferRequest.InferRequestedOutputTensor\x12\x1a\n\x12raw_input_contents\x18\x07 \x03(\x0c\x1a\x94\x02\n\x10InferInputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12Q\n\nparameters\x18\x04 \x03(\x0b\x32=.inference.ModelInferRequest.InferInputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1a\xd5\x01\n\x1aInferRequestedOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12[\n\nparameters\x18\x02 \x03(\x0b\x32G.inference.ModelInferRequest.InferRequestedOutputTensor.ParametersEntry\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"\xd5\x04\n\x12ModelInferResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12\x41\n\nparameters\x18\x04 \x03(\x0b\x32-.inference.ModelInferResponse.ParametersEntry\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelInferResponse.InferOutputTensor\x12\x1b\n\x13raw_output_contents\x18\x06 \x03(\x0c\x1a\x97\x02\n\x11InferOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12S\n\nparameters\x18\x04 \x03(\x0b\x32?.inference.ModelInferResponse.InferOutputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"i\n\x0eInferParameter\x12\x14\n\nbool_param\x18\x01 \x01(\x08H\x00\x12\x15\n\x0bint64_param\x18\x02 \x01(\x03H\x00\x12\x16\n\x0cstring_param\x18\x03 \x01(\tH\x00\x42\x12\n\x10parameter_choice"\xd0\x01\n\x13InferTensorContents\x12\x15\n\rbool_contents\x18\x01 \x03(\x08\x12\x14\n\x0cint_contents\x18\x02 \x03(\x05\x12\x16\n\x0eint64_contents\x18\x03 \x03(\x03\x12\x15\n\ruint_contents\x18\x04 \x03(\r\x12\x17\n\x0fuint64_contents\x18\x05 \x03(\x04\x12\x15\n\rfp32_contents\x18\x06 \x03(\x02\x12\x15\n\rfp64_contents\x18\x07 \x03(\x01\x12\x16\n\x0e\x62ytes_contents\x18\x08 \x03(\x0c"\x8a\x01\n\x18ModelRepositoryParameter\x12\x14\n\nbool_param\x18\x01 \x01(\x08H\x00\x12\x15\n\x0bint64_param\x18\x02 \x01(\x03H\x00\x12\x16\n\x0cstring_param\x18\x03 \x01(\tH\x00\x12\x15\n\x0b\x62ytes_param\x18\x04 \x01(\x0cH\x00\x42\x12\n\x10parameter_choice"@\n\x16RepositoryIndexRequest\x12\x17\n\x0frepository_name\x18\x01 \x01(\t\x12\r\n\x05ready\x18\x02 \x01(\x08"\xa4\x01\n\x17RepositoryIndexResponse\x12=\n\x06models\x18\x01 \x03(\x0b\x32-.inference.RepositoryIndexResponse.ModelIndex\x1aJ\n\nModelIndex\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\r\n\x05state\x18\x03 \x01(\t\x12\x0e\n\x06reason\x18\x04 \x01(\t"\xec\x01\n\x1aRepositoryModelLoadRequest\x12\x17\n\x0frepository_name\x18\x01 \x01(\t\x12\x12\n\nmodel_name\x18\x02 \x01(\t\x12I\n\nparameters\x18\x03 \x03(\x0b\x32\x35.inference.RepositoryModelLoadRequest.ParametersEntry\x1aV\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.inference.ModelRepositoryParameter:\x02\x38\x01"\x1d\n\x1bRepositoryModelLoadResponse"\xf0\x01\n\x1cRepositoryModelUnloadRequest\x12\x17\n\x0frepository_name\x18\x01 \x01(\t\x12\x12\n\nmodel_name\x18\x02 \x01(\t\x12K\n\nparameters\x18\x03 \x03(\x0b\x32\x37.inference.RepositoryModelUnloadRequest.ParametersEntry\x1aV\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.inference.ModelRepositoryParameter:\x02\x38\x01"\x1f\n\x1dRepositoryModelUnloadResponse2\xae\x06\n\x14GRPCInferenceService\x12K\n\nServerLive\x12\x1c.inference.ServerLiveRequest\x1a\x1d.inference.ServerLiveResponse"\x00\x12N\n\x0bServerReady\x12\x1d.inference.ServerReadyRequest\x1a\x1e.inference.ServerReadyResponse"\x00\x12K\n\nModelReady\x12\x1c.inference.ModelReadyRequest\x1a\x1d.inference.ModelReadyResponse"\x00\x12W\n\x0eServerMetadata\x12 .inference.ServerMetadataRequest\x1a!.inference.ServerMetadataResponse"\x00\x12T\n\rModelMetadata\x12\x1f.inference.ModelMetadataRequest\x1a .inference.ModelMetadataResponse"\x00\x12K\n\nModelInfer\x12\x1c.inference.ModelInferRequest\x1a\x1d.inference.ModelInferResponse"\x00\x12Z\n\x0fRepositoryIndex\x12!.inference.RepositoryIndexRequest\x1a".inference.RepositoryIndexResponse"\x00\x12\x66\n\x13RepositoryModelLoad\x12%.inference.RepositoryModelLoadRequest\x1a&.inference.RepositoryModelLoadResponse"\x00\x12l\n\x15RepositoryModelUnload\x12\'.inference.RepositoryModelUnloadRequest\x1a(.inference.RepositoryModelUnloadResponse"\x00\x62\x06proto3' + b'\n\x0f\x64\x61taplane.proto\x12\tinference"\x13\n\x11ServerLiveRequest""\n\x12ServerLiveResponse\x12\x0c\n\x04live\x18\x01 \x01(\x08"\x14\n\x12ServerReadyRequest"$\n\x13ServerReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"2\n\x11ModelReadyRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"#\n\x12ModelReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"\x17\n\x15ServerMetadataRequest"K\n\x16ServerMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nextensions\x18\x03 \x03(\t"5\n\x14ModelMetadataRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"\xc5\x04\n\x15ModelMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08versions\x18\x02 \x03(\t\x12\x10\n\x08platform\x18\x03 \x01(\t\x12?\n\x06inputs\x18\x04 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x12\x44\n\nparameters\x18\x06 \x03(\x0b\x32\x30.inference.ModelMetadataResponse.ParametersEntry\x1a\xe2\x01\n\x0eTensorMetadata\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12S\n\nparameters\x18\x04 \x03(\x0b\x32?.inference.ModelMetadataResponse.TensorMetadata.ParametersEntry\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"\xee\x06\n\x11ModelInferRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12@\n\nparameters\x18\x04 \x03(\x0b\x32,.inference.ModelInferRequest.ParametersEntry\x12=\n\x06inputs\x18\x05 \x03(\x0b\x32-.inference.ModelInferRequest.InferInputTensor\x12H\n\x07outputs\x18\x06 \x03(\x0b\x32\x37.inference.ModelInferRequest.InferRequestedOutputTensor\x12\x1a\n\x12raw_input_contents\x18\x07 \x03(\x0c\x1a\x94\x02\n\x10InferInputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12Q\n\nparameters\x18\x04 \x03(\x0b\x32=.inference.ModelInferRequest.InferInputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1a\xd5\x01\n\x1aInferRequestedOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12[\n\nparameters\x18\x02 \x03(\x0b\x32G.inference.ModelInferRequest.InferRequestedOutputTensor.ParametersEntry\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"\xd5\x04\n\x12ModelInferResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12\x41\n\nparameters\x18\x04 \x03(\x0b\x32-.inference.ModelInferResponse.ParametersEntry\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelInferResponse.InferOutputTensor\x12\x1b\n\x13raw_output_contents\x18\x06 \x03(\x0c\x1a\x97\x02\n\x11InferOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12S\n\nparameters\x18\x04 \x03(\x0b\x32?.inference.ModelInferResponse.InferOutputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"i\n\x0eInferParameter\x12\x14\n\nbool_param\x18\x01 \x01(\x08H\x00\x12\x15\n\x0bint64_param\x18\x02 \x01(\x03H\x00\x12\x16\n\x0cstring_param\x18\x03 \x01(\tH\x00\x42\x12\n\x10parameter_choice"\xd0\x01\n\x13InferTensorContents\x12\x15\n\rbool_contents\x18\x01 \x03(\x08\x12\x14\n\x0cint_contents\x18\x02 \x03(\x05\x12\x16\n\x0eint64_contents\x18\x03 \x03(\x03\x12\x15\n\ruint_contents\x18\x04 \x03(\r\x12\x17\n\x0fuint64_contents\x18\x05 \x03(\x04\x12\x15\n\rfp32_contents\x18\x06 \x03(\x02\x12\x15\n\rfp64_contents\x18\x07 \x03(\x01\x12\x16\n\x0e\x62ytes_contents\x18\x08 \x03(\x0c"\x8a\x01\n\x18ModelRepositoryParameter\x12\x14\n\nbool_param\x18\x01 \x01(\x08H\x00\x12\x15\n\x0bint64_param\x18\x02 \x01(\x03H\x00\x12\x16\n\x0cstring_param\x18\x03 \x01(\tH\x00\x12\x15\n\x0b\x62ytes_param\x18\x04 \x01(\x0cH\x00\x42\x12\n\x10parameter_choice"@\n\x16RepositoryIndexRequest\x12\x17\n\x0frepository_name\x18\x01 \x01(\t\x12\r\n\x05ready\x18\x02 \x01(\x08"\xa4\x01\n\x17RepositoryIndexResponse\x12=\n\x06models\x18\x01 \x03(\x0b\x32-.inference.RepositoryIndexResponse.ModelIndex\x1aJ\n\nModelIndex\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\r\n\x05state\x18\x03 \x01(\t\x12\x0e\n\x06reason\x18\x04 \x01(\t"\xec\x01\n\x1aRepositoryModelLoadRequest\x12\x17\n\x0frepository_name\x18\x01 \x01(\t\x12\x12\n\nmodel_name\x18\x02 \x01(\t\x12I\n\nparameters\x18\x03 \x03(\x0b\x32\x35.inference.RepositoryModelLoadRequest.ParametersEntry\x1aV\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.inference.ModelRepositoryParameter:\x02\x38\x01"\x1d\n\x1bRepositoryModelLoadResponse"\xf0\x01\n\x1cRepositoryModelUnloadRequest\x12\x17\n\x0frepository_name\x18\x01 \x01(\t\x12\x12\n\nmodel_name\x18\x02 \x01(\t\x12K\n\nparameters\x18\x03 \x03(\x0b\x32\x37.inference.RepositoryModelUnloadRequest.ParametersEntry\x1aV\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x32\n\x05value\x18\x02 \x01(\x0b\x32#.inference.ModelRepositoryParameter:\x02\x38\x01"\x1f\n\x1dRepositoryModelUnloadResponse2\x85\x07\n\x14GRPCInferenceService\x12K\n\nServerLive\x12\x1c.inference.ServerLiveRequest\x1a\x1d.inference.ServerLiveResponse"\x00\x12N\n\x0bServerReady\x12\x1d.inference.ServerReadyRequest\x1a\x1e.inference.ServerReadyResponse"\x00\x12K\n\nModelReady\x12\x1c.inference.ModelReadyRequest\x1a\x1d.inference.ModelReadyResponse"\x00\x12W\n\x0eServerMetadata\x12 .inference.ServerMetadataRequest\x1a!.inference.ServerMetadataResponse"\x00\x12T\n\rModelMetadata\x12\x1f.inference.ModelMetadataRequest\x1a .inference.ModelMetadataResponse"\x00\x12K\n\nModelInfer\x12\x1c.inference.ModelInferRequest\x1a\x1d.inference.ModelInferResponse"\x00\x12U\n\x10ModelStreamInfer\x12\x1c.inference.ModelInferRequest\x1a\x1d.inference.ModelInferResponse"\x00(\x01\x30\x01\x12Z\n\x0fRepositoryIndex\x12!.inference.RepositoryIndexRequest\x1a".inference.RepositoryIndexResponse"\x00\x12\x66\n\x13RepositoryModelLoad\x12%.inference.RepositoryModelLoadRequest\x1a&.inference.RepositoryModelLoadResponse"\x00\x12l\n\x15RepositoryModelUnload\x12\'.inference.RepositoryModelUnloadRequest\x1a(.inference.RepositoryModelUnloadResponse"\x00\x42\x0f\n\x07\x65x.grpc\xa2\x02\x03HSWb\x06proto3' ) @@ -466,6 +466,7 @@ if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b"\n\007ex.grpc\242\002\003HSW" _MODELMETADATARESPONSE_TENSORMETADATA_PARAMETERSENTRY._options = None _MODELMETADATARESPONSE_TENSORMETADATA_PARAMETERSENTRY._serialized_options = b"8\001" _MODELMETADATARESPONSE_PARAMETERSENTRY._options = None @@ -559,5 +560,5 @@ _REPOSITORYMODELUNLOADRESPONSE._serialized_start = 3663 _REPOSITORYMODELUNLOADRESPONSE._serialized_end = 3694 _GRPCINFERENCESERVICE._serialized_start = 3697 - _GRPCINFERENCESERVICE._serialized_end = 4511 + _GRPCINFERENCESERVICE._serialized_end = 4598 # @@protoc_insertion_point(module_scope) diff --git a/mlserver/grpc/dataplane_pb2_grpc.py b/mlserver/grpc/dataplane_pb2_grpc.py index 9ad255a0a..266d34de7 100644 --- a/mlserver/grpc/dataplane_pb2_grpc.py +++ b/mlserver/grpc/dataplane_pb2_grpc.py @@ -47,6 +47,11 @@ def __init__(self, channel): request_serializer=dataplane__pb2.ModelInferRequest.SerializeToString, response_deserializer=dataplane__pb2.ModelInferResponse.FromString, ) + self.ModelStreamInfer = channel.stream_stream( + "/inference.GRPCInferenceService/ModelStreamInfer", + request_serializer=dataplane__pb2.ModelInferRequest.SerializeToString, + response_deserializer=dataplane__pb2.ModelInferResponse.FromString, + ) self.RepositoryIndex = channel.unary_unary( "/inference.GRPCInferenceService/RepositoryIndex", request_serializer=dataplane__pb2.RepositoryIndexRequest.SerializeToString, @@ -106,6 +111,12 @@ def ModelInfer(self, request, context): context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") + def ModelStreamInfer(self, request_iterator, context): + """Perform stream inference using a specific model.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + def RepositoryIndex(self, request, context): """Get the index of model repository contents.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) @@ -157,6 +168,11 @@ def add_GRPCInferenceServiceServicer_to_server(servicer, server): request_deserializer=dataplane__pb2.ModelInferRequest.FromString, response_serializer=dataplane__pb2.ModelInferResponse.SerializeToString, ), + "ModelStreamInfer": grpc.stream_stream_rpc_method_handler( + servicer.ModelStreamInfer, + request_deserializer=dataplane__pb2.ModelInferRequest.FromString, + response_serializer=dataplane__pb2.ModelInferResponse.SerializeToString, + ), "RepositoryIndex": grpc.unary_unary_rpc_method_handler( servicer.RepositoryIndex, request_deserializer=dataplane__pb2.RepositoryIndexRequest.FromString, @@ -360,6 +376,35 @@ def ModelInfer( metadata, ) + @staticmethod + def ModelStreamInfer( + request_iterator, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.stream_stream( + request_iterator, + target, + "/inference.GRPCInferenceService/ModelStreamInfer", + dataplane__pb2.ModelInferRequest.SerializeToString, + dataplane__pb2.ModelInferResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + @staticmethod def RepositoryIndex( request, diff --git a/mlserver/grpc/servicers.py b/mlserver/grpc/servicers.py index 4e376d9e6..548fe5e70 100644 --- a/mlserver/grpc/servicers.py +++ b/mlserver/grpc/servicers.py @@ -10,10 +10,17 @@ RepositoryIndexRequestConverter, RepositoryIndexResponseConverter, ) -from .utils import to_headers, to_metadata, handle_mlserver_error +from .utils import ( + to_headers, + to_metadata, + handle_mlserver_error, + handle_mlserver_stream_error, +) from ..utils import insert_headers, extract_headers from ..handlers import DataPlane, ModelRepositoryHandlers +from ..types import InferenceResponse, InferenceRequest +from typing import AsyncIterator class InferenceServicer(GRPCInferenceServiceServicer): @@ -63,29 +70,67 @@ async def ModelMetadata( async def ModelInfer( self, request: pb.ModelInferRequest, context: grpc.ServicerContext ) -> pb.ModelInferResponse: - return_raw = False - if request.raw_input_contents: - # If the request contains raw input contents, then use the same for - # the output - return_raw = True - - payload = ModelInferRequestConverter.to_types(request) - - request_headers = to_headers(context) - insert_headers(payload, request_headers) - + use_raw = InferenceServicer._GetReturnRaw(request) + payload = self._InsertHeaders(request, context) result = await self._data_plane.infer( payload=payload, name=request.model_name, version=request.model_version ) + self._SetTrailingMetadata(result, context) + return ModelInferResponseConverter.from_types(result, use_raw=use_raw) + + @handle_mlserver_stream_error + async def ModelStreamInfer( + self, + requests_stream: AsyncIterator[pb.ModelInferRequest], + context: grpc.ServicerContext, + ) -> AsyncIterator[pb.ModelInferResponse]: + async for request in requests_stream: + break + + use_raw = InferenceServicer._GetReturnRaw(request) + payloads = self._PayloadsMetadataGenerator(request, requests_stream, context) + + async for result in self._data_plane.infer_stream( + payloads=payloads, name=request.model_name, version=request.model_version + ): + response = ModelInferResponseConverter.from_types(result, use_raw=use_raw) + yield response + + self._SetTrailingMetadata(result, context) + + async def _PayloadsMetadataGenerator( + self, + request: pb.ModelInferRequest, + requests_stream: AsyncIterator[pb.ModelInferRequest], + context: grpc.ServicerContext, + ) -> AsyncIterator[InferenceRequest]: + payload = self._InsertHeaders(request, context) + yield payload + + async for request in requests_stream: + payload = self._InsertHeaders(request, context) + yield payload + + @staticmethod + def _GetReturnRaw(request: pb.ModelInferRequest) -> bool: + return True if request.raw_input_contents else False + + def _InsertHeaders( + self, request: pb.ModelInferRequest, context: grpc.ServicerContext + ) -> InferenceRequest: + payload = ModelInferRequestConverter.to_types(request) + request_headers = to_headers(context) + insert_headers(payload, request_headers) + return payload + def _SetTrailingMetadata( + self, result: InferenceResponse, context: grpc.ServicerContext + ) -> None: response_headers = extract_headers(result) if response_headers: response_metadata = to_metadata(response_headers) context.set_trailing_metadata(response_metadata) - response = ModelInferResponseConverter.from_types(result, use_raw=return_raw) - return response - async def RepositoryIndex( self, request: pb.RepositoryIndexRequest, context ) -> pb.RepositoryIndexResponse: diff --git a/mlserver/grpc/utils.py b/mlserver/grpc/utils.py index 31f7c39b5..1f58ea98e 100644 --- a/mlserver/grpc/utils.py +++ b/mlserver/grpc/utils.py @@ -50,3 +50,18 @@ async def _inner(self, request, context): await context.abort(code=grpc.StatusCode.INTERNAL, details=str(err)) return _inner + + +def handle_mlserver_stream_error(f: Callable): + async def _inner(self, request_stream, context): + try: + async for response in f(self, request_stream, context): + yield response + except MLServerError as err: + logger.exception(err) + await context.abort(code=_grpc_status_code(err), details=str(err)) + except Exception as err: + logger.exception(err) + await context.abort(code=grpc.StatusCode.INTERNAL, details=str(err)) + + return _inner diff --git a/mlserver/handlers/dataplane.py b/mlserver/handlers/dataplane.py index 93ac4fe18..026920011 100644 --- a/mlserver/handlers/dataplane.py +++ b/mlserver/handlers/dataplane.py @@ -1,9 +1,11 @@ +from contextlib import asynccontextmanager from prometheus_client import ( Counter, Summary, ) -from typing import Optional +from typing import AsyncIterator, Optional +from ..model import MLModel from ..errors import ModelNotReady from ..context import model_context from ..settings import Settings @@ -85,6 +87,95 @@ async def infer( name: str, version: Optional[str] = None, ) -> InferenceResponse: + # need to cache the payload here since it + # will be modified in the context manager + if self._response_cache is not None: + cache_key = payload.model_dump_json() + + async with self._infer_contextmanager(name, version) as model: + payload = self._prepare_payload(payload, model) + + if ( + self._response_cache is not None + and model.settings.cache_enabled is not False + ): + cache_value = await self._response_cache.lookup(cache_key) + if cache_value != "": + prediction = InferenceResponse.model_validate_json(cache_value) + else: + prediction = await model.predict(payload) + # ignore cache insertion error if any + await self._response_cache.insert( + cache_key, prediction.model_dump_json() + ) + else: + prediction = await model.predict(payload) + + # Ensure ID matches + prediction.id = payload.id + self._inference_middleware.response_middleware(prediction, model.settings) + return prediction + + async def infer_stream( + self, + payloads: AsyncIterator[InferenceRequest], + name: str, + version: Optional[str] = None, + ) -> AsyncIterator[InferenceResponse]: + # TODO: Implement cache for stream + + async with self._infer_contextmanager(name, version) as model: + # we need to get the first payload to get the ID + async for payload in payloads: + break + + payloads_decorated = self._prepare_payloads_generator( + payload, payloads, model + ) + async for prediction in model.predict_stream(payloads_decorated): + prediction.id = payload.id # Ensure ID matches + self._inference_middleware.response_middleware( + prediction, model.settings + ) + yield prediction + + def _prepare_payload( + self, payload: InferenceRequest, model: MLModel + ) -> InferenceRequest: + if payload.id is None: + payload.id = generate_uuid() + + self._inference_middleware.request_middleware(payload, model.settings) + return payload + + async def _prepare_payloads_generator( + self, + first_payload: InferenceRequest, + subsequent_payloads: AsyncIterator[InferenceRequest], + model: MLModel, + ) -> AsyncIterator[InferenceRequest]: + # yield the first payload after preparing it + first_payload = self._prepare_payload(first_payload, model) + yield first_payload + + # Yield the rest of the payloads after preparing them + # and set the ID to match the first payload. Note that + # we don't make any assumptions about how many inputs and + # outputs there are. Thus, everything gets the same ID, cause + # otherwise we could have one to many, many to one, or many to + # many id mappings. + async for payload in subsequent_payloads: + payload.id = first_payload.id + payload = self._prepare_payload(payload, model) + yield payload + + @asynccontextmanager + async def _infer_contextmanager( + self, + name: str, + version: Optional[str] = None, + ) -> AsyncIterator[MLModel]: + infer_duration = self._ModelInferRequestDuration.labels( model=name, version=version ).time() @@ -92,46 +183,16 @@ async def infer( model=name, version=version ).count_exceptions() - if self._response_cache is not None: - cache_key = payload.model_dump_json() - with infer_duration, infer_errors: - if payload.id is None: - payload.id = generate_uuid() - model = await self._model_registry.get_model(name, version) if not model.ready: raise ModelNotReady(name, version) - self._inference_middleware.request_middleware(payload, model.settings) - - # TODO: Make await optional for sync methods with model_context(model.settings): - if ( - self._response_cache is not None - and model.settings.cache_enabled is not False - ): - cache_value = await self._response_cache.lookup(cache_key) - if cache_value != "": - prediction = InferenceResponse.parse_raw(cache_value) - else: - prediction = await model.predict(payload) - # ignore cache insertion error if any - await self._response_cache.insert( - cache_key, prediction.model_dump_json() - ) - else: - prediction = await model.predict(payload) - - # Ensure ID matches - prediction.id = payload.id - - self._inference_middleware.response_middleware(prediction, model.settings) + yield model self._ModelInferRequestSuccess.labels(model=name, version=version).inc() - return prediction - def _create_response_cache(self) -> ResponseCache: return LocalCache(size=self._settings.cache_size) diff --git a/mlserver/model.py b/mlserver/model.py index 77a2521a4..8296bc19e 100644 --- a/mlserver/model.py +++ b/mlserver/model.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, List +from typing import Any, Dict, Optional, List, AsyncIterator from .codecs import ( encode_response_output, @@ -64,7 +64,7 @@ async def load(self) -> bool: Its return value will represent the model's readiness status. A return value of ``True`` will mean the model is ready. - **This method should be overriden to implement your custom load + **This method can be overriden to implement your custom load logic.** """ return True @@ -74,11 +74,24 @@ async def predict(self, payload: InferenceRequest) -> InferenceResponse: Method responsible for running inference on the model. - **This method should be overriden to implement your custom inference + **This method can be overriden to implement your custom inference logic.** """ raise NotImplementedError("predict() method not implemented") + async def predict_stream( + self, payloads: AsyncIterator[InferenceRequest] + ) -> AsyncIterator[InferenceResponse]: + """ + Method responsible for running generation on the model, streaming a set + of responses back to the client. + + + **This method can be overriden to implement your custom inference + logic.** + """ + yield await self.predict((await payloads.__anext__())) + async def unload(self) -> bool: """ Method responsible for unloading the model, freeing any resources (e.g. @@ -88,7 +101,7 @@ async def unload(self) -> bool: enabled). A return value of ``True`` will mean the model is now unloaded. - **This method should be overriden to implement your custom unload + **This method can be overriden to implement your custom unload logic.** """ return True diff --git a/mlserver/rest/app.py b/mlserver/rest/app.py index e834d4780..35557f43c 100644 --- a/mlserver/rest/app.py +++ b/mlserver/rest/app.py @@ -78,6 +78,39 @@ def create_app( endpoints.infer, methods=["POST"], ), + # Model generate + APIRoute( + "/v2/models/{model_name}/generate", + endpoints.infer, + methods=["POST"], + ), + APIRoute( + "/v2/models/{model_name}/versions/{model_version}/generate", + endpoints.infer, + methods=["POST"], + ), + # Model infer_stream + APIRoute( + "/v2/models/{model_name}/infer_stream", + endpoints.infer_stream, + methods=["POST"], + ), + APIRoute( + "/v2/models/{model_name}/versions/{model_version}/infer_stream", + endpoints.infer_stream, + methods=["POST"], + ), + # Model generate_stream + APIRoute( + "/v2/models/{model_name}/generate_stream", + endpoints.infer_stream, + methods=["POST"], + ), + APIRoute( + "/v2/models/{model_name}/versions/{model_version}/generate_stream", + endpoints.infer_stream, + methods=["POST"], + ), # Model metadata APIRoute( "/v2/models/{model_name}", @@ -167,7 +200,12 @@ def create_app( ) app.router.route_class = APIRoute - app.add_middleware(GZipMiddleware) + + if settings.gzip_enabled: + # GZip middleware does not work with streaming + # see here: https://github.com/encode/starlette/issues/20#issuecomment-704106436 + app.add_middleware(GZipMiddleware) + if settings.cors_settings is not None: app.add_middleware( CORSMiddleware, diff --git a/mlserver/rest/endpoints.py b/mlserver/rest/endpoints.py index d6ed9bfcf..d0951c59d 100644 --- a/mlserver/rest/endpoints.py +++ b/mlserver/rest/endpoints.py @@ -1,8 +1,8 @@ from fastapi.requests import Request -from fastapi.responses import Response, HTMLResponse +from fastapi.responses import Response, HTMLResponse, StreamingResponse from fastapi.openapi.docs import get_swagger_ui_html -from typing import Optional +from typing import AsyncIterator, Optional from ..types import ( MetadataModelResponse, @@ -15,6 +15,7 @@ from ..handlers import DataPlane, ModelRepositoryHandlers from ..utils import insert_headers, extract_headers +from .responses import ServerSentEvent from .openapi import get_openapi_schema, get_model_schema_uri, get_model_schema from .utils import to_status_code @@ -93,19 +94,57 @@ async def infer( model_name: str, model_version: Optional[str] = None, ) -> InferenceResponse: + request_headers = dict(raw_request.headers) insert_headers(payload, request_headers) inference_response = await self._data_plane.infer( payload, model_name, model_version ) - response_headers = extract_headers(inference_response) + if response_headers: raw_response.headers.update(response_headers) return inference_response + async def infer_stream( + self, + raw_request: Request, + payload: InferenceRequest, + model_name: str, + model_version: Optional[str] = None, + ) -> StreamingResponse: + + request_headers = dict(raw_request.headers) + insert_headers(payload, request_headers) + + async def payloads_generator( + payload: InferenceRequest, + ) -> AsyncIterator[InferenceRequest]: + yield payload + + payloads = payloads_generator(payload) + infer_stream = self._data_plane.infer_stream( + payloads, model_name, model_version + ) + + sse_stream = _as_sse(infer_stream) + return StreamingResponse(content=sse_stream, media_type="text/event-stream") + + +async def _as_sse( + infer_stream: AsyncIterator[InferenceResponse], +) -> AsyncIterator[bytes]: + """ + Helper to convert all the responses coming out of a generator to a + Server-Sent Event object. + """ + async for inference_response in infer_stream: + # TODO: How should we send headers back? + # response_headers = extract_headers(inference_response) + yield ServerSentEvent(inference_response).encode() + class ModelRepositoryEndpoints: def __init__(self, handlers: ModelRepositoryHandlers): diff --git a/mlserver/rest/responses.py b/mlserver/rest/responses.py index c55835064..c070a492f 100644 --- a/mlserver/rest/responses.py +++ b/mlserver/rest/responses.py @@ -2,6 +2,7 @@ from typing import Any +from pydantic import BaseModel from starlette.responses import JSONResponse as _JSONResponse from ..codecs.string import decode_str @@ -31,25 +32,42 @@ class Response(_JSONResponse): media_type = "application/json" def render(self, content: Any) -> bytes: - if orjson is None: - # Original implementation of starlette's JSONResponse, using our - # custom encoder (capable of "encoding" bytes). - # Original implementation can be seen here: - # https://github.com/encode/starlette/blob/ - # f53faba229e3fa2844bc3753e233d9c1f54cca52/starlette/responses.py#L173-L180 - return json.dumps( - content, - ensure_ascii=False, - allow_nan=False, - indent=None, - separators=(",", ":"), - cls=BytesJSONEncoder, - ).encode("utf-8") - - # This is equivalent to the ORJSONResponse implementation in FastAPI: - # https://github.com/tiangolo/fastapi/blob/ - # 864643ef7608d28ac4ed321835a7fb4abe3dfc13/fastapi/responses.py#L32-L34 - return orjson.dumps(content, default=_encode_bytes) + return _render(content) + + +class ServerSentEvent: + def __init__(self, data: BaseModel, *args, **kwargs): + # NOTE: SSE should use `\n\n` as separator + # https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format + self._sep = b"\n\n" + self._pre = b"data: " + self.data = data + + def encode(self) -> bytes: + as_dict = self.data.model_dump() + return self._pre + _render(as_dict) + self._sep + + +def _render(content: Any) -> bytes: + if orjson is None: + # Original implementation of starlette's JSONResponse, using our + # custom encoder (capable of "encoding" bytes). + # Original implementation can be seen here: + # https://github.com/encode/starlette/blob/ + # f53faba229e3fa2844bc3753e233d9c1f54cca52/starlette/responses.py#L173-L180 + return json.dumps( + content, + ensure_ascii=False, + allow_nan=False, + indent=None, + separators=(",", ":"), + cls=BytesJSONEncoder, + ).encode("utf-8") + + # This is equivalent to the ORJSONResponse implementation in FastAPI: + # https://github.com/tiangolo/fastapi/blob/ + # 864643ef7608d28ac4ed321835a7fb4abe3dfc13/fastapi/responses.py#L32-L34 + return orjson.dumps(content, default=_encode_bytes) def _encode_bytes(obj: Any) -> str: diff --git a/mlserver/settings.py b/mlserver/settings.py index c1b79d177..e3a79ef73 100644 --- a/mlserver/settings.py +++ b/mlserver/settings.py @@ -277,6 +277,9 @@ class Settings(BaseSettings): cache_size: int = 100 """Cache size to be used if caching is enabled.""" + gzip_enabled: bool = True + """Enable GZipMiddleware.""" + class ModelParameters(BaseSettings): """ diff --git a/poetry.lock b/poetry.lock index 49c0378b3..f16e742ce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -2452,6 +2452,17 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "httpx-sse" +version = "0.3.1" +description = "Consume Server-Sent Event (SSE) messages with HTTPX." +optional = false +python-versions = ">=3.7" +files = [ + {file = "httpx-sse-0.3.1.tar.gz", hash = "sha256:3bb3289b2867f50cbdb2fee3eeeefecb1e86653122e164faac0023f1ffc88aea"}, + {file = "httpx_sse-0.3.1-py3-none-any.whl", hash = "sha256:7376dd88732892f9b6b549ac0ad05a8e2341172fe7dcf9f8f9c8050934297316"}, +] + [[package]] name = "huggingface-hub" version = "0.21.3" @@ -2643,13 +2654,13 @@ files = [ [[package]] name = "jinja2" -version = "3.1.4" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -5223,6 +5234,20 @@ decopatch = "*" makefun = ">=1.15.1" packaging = "*" +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "pytest-mock" version = "3.12.0" @@ -7373,13 +7398,13 @@ testing = ["build[virtualenv] (>=1.0.3)", "covdefaults (>=2.3)", "detect-test-po [[package]] name = "tqdm" -version = "4.66.3" +version = "4.66.2" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"}, - {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"}, + {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, + {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, ] [package.dependencies] @@ -7942,13 +7967,13 @@ files = [ [[package]] name = "werkzeug" -version = "3.0.3" +version = "3.0.1" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.8" files = [ - {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, - {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, + {file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"}, + {file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"}, ] [package.dependencies] @@ -8391,4 +8416,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "5c14b46a83aef0fcb3a25e33f973d49e2feb77b3b0fc73a549ba131eabb1b2a1" +content-hash = "41fb1fe873201b8ea948f236102b7cf848f24c70f2000f0063cd7b169c97cfe6" diff --git a/proto/dataplane.proto b/proto/dataplane.proto index ba11224ee..ee86462c2 100644 --- a/proto/dataplane.proto +++ b/proto/dataplane.proto @@ -1,5 +1,8 @@ syntax = "proto3"; +option java_package = "ex.grpc"; +option objc_class_prefix = "HSW"; + package inference; // @@ -24,6 +27,9 @@ service GRPCInferenceService // Perform inference using a specific model. rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} + + // Perform stream inference using a specific model. + rpc ModelStreamInfer(stream ModelInferRequest) returns (stream ModelInferResponse) {} // Get the index of model repository contents. rpc RepositoryIndex(RepositoryIndexRequest) diff --git a/pyproject.toml b/pyproject.toml index e36ac2348..b67d79dee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ opentelemetry-sdk = "^1.22.0" opentelemetry-instrumentation-fastapi = ">=0.43b0" opentelemetry-instrumentation-grpc = ">=0.43b0" opentelemetry-exporter-otlp-proto-grpc = "^1.22.0" +pytest-lazy-fixture = "^0.6.3" [tool.poetry.group.dev.dependencies] datamodel-code-generator = "0.25.5" @@ -104,6 +105,7 @@ black = "24.4.0" pip-licenses = "4.4.0" pytest-xdist = "3.5.0" filelock = "^3.13.1" +httpx-sse = "^0.3.1" [tool.poetry.group.docker.dependencies] protobuf = "3.20.3" diff --git a/runtimes/alibi-detect/poetry.lock b/runtimes/alibi-detect/poetry.lock index b7ec5a27c..82e615a5f 100644 --- a/runtimes/alibi-detect/poetry.lock +++ b/runtimes/alibi-detect/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -1601,6 +1601,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "joblib" version = "1.4.0" @@ -2004,6 +2015,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -2682,6 +2694,21 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa typing = ["typing-extensions"] xmp = ["defusedxml"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -2911,6 +2938,42 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3903,15 +3966,26 @@ files = [ {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tqdm" -version = "4.66.3" +version = "4.66.2" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"}, - {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"}, + {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, + {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, ] [package.dependencies] @@ -4120,13 +4194,13 @@ test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)" [[package]] name = "werkzeug" -version = "3.0.3" +version = "3.0.2" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.8" files = [ - {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, - {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, + {file = "werkzeug-3.0.2-py3-none-any.whl", hash = "sha256:3aac3f5da756f93030740bc235d3e09449efcf65f2f55e3602e1d851b8f48795"}, + {file = "werkzeug-3.0.2.tar.gz", hash = "sha256:e39b645a6ac92822588e7b39a692e7828724ceae0b0d702ef96701f90e70128d"}, ] [package.dependencies] diff --git a/runtimes/alibi-explain/poetry.lock b/runtimes/alibi-explain/poetry.lock index 4343c8ab0..b0df8991c 100644 --- a/runtimes/alibi-explain/poetry.lock +++ b/runtimes/alibi-explain/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -1657,6 +1657,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "jinja2" version = "3.1.3" @@ -2115,6 +2126,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -2863,6 +2875,21 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa typing = ["typing-extensions"] xmp = ["defusedxml"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "preshed" version = "3.0.9" @@ -3138,6 +3165,42 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3286,6 +3349,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -4440,6 +4504,17 @@ dev = ["tokenizers[testing]"] docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tqdm" version = "4.66.2" @@ -4794,6 +4869,16 @@ files = [ {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, + {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"}, + {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"}, + {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"}, + {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, diff --git a/runtimes/catboost/poetry.lock b/runtimes/catboost/poetry.lock index 38e55b996..872fc7efb 100644 --- a/runtimes/catboost/poetry.lock +++ b/runtimes/catboost/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -1157,15 +1157,26 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "joblib" -version = "1.4.2" +version = "1.4.0" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" files = [ - {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, - {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, + {file = "joblib-1.4.0-py3-none-any.whl", hash = "sha256:42942470d4062537be4d54c83511186da1fc14ba354961a2114da91efa9a4ed7"}, + {file = "joblib-1.4.0.tar.gz", hash = "sha256:1eb0dc091919cd384490de890cb5dfd538410a6d4b3b54eef09fb8c50b409b1c"}, ] [[package]] @@ -1359,6 +1370,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -1943,6 +1955,21 @@ files = [ packaging = "*" tenacity = ">=6.2.0" +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -2147,6 +2174,42 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -2397,6 +2460,17 @@ files = [ [package.extras] doc = ["reno", "sphinx", "tornado (>=4.5)"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tritonclient" version = "2.42.0" diff --git a/runtimes/huggingface/poetry.lock b/runtimes/huggingface/poetry.lock index 5f4252bf0..745b9648f 100644 --- a/runtimes/huggingface/poetry.lock +++ b/runtimes/huggingface/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -1416,6 +1416,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "intel-openmp" version = "2021.4.0" @@ -1432,13 +1443,13 @@ files = [ [[package]] name = "jinja2" -version = "3.1.4" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -1692,6 +1703,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -2359,13 +2371,13 @@ tests = ["pytest", "pytest-cov", "pytest-pep8"] [[package]] name = "optimum" -version = "1.19.2" +version = "1.19.1" description = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality." optional = false python-versions = ">=3.7.0" files = [ - {file = "optimum-1.19.2-py3-none-any.whl", hash = "sha256:66f0fafda050ee6671bab6899852b9bf95afac766d99aa54a40699c7dee598bf"}, - {file = "optimum-1.19.2.tar.gz", hash = "sha256:fc22e07f084d867bd9bce32fd0d737f7c4863514ea5d90c7acccf5dcfe5f2296"}, + {file = "optimum-1.19.1-py3-none-any.whl", hash = "sha256:ca474589682fe10f7827c85260d116603a5823d5c251c453620584cbc06fa5f6"}, + {file = "optimum-1.19.1.tar.gz", hash = "sha256:fd723b723bb7fe57d98b4afbd5f9e1d923d31a9843eff32331725162994ec849"}, ] [package.dependencies] @@ -2396,7 +2408,7 @@ exporters-gpu = ["onnx", "onnxruntime-gpu", "timm"] exporters-tf = ["h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm", "transformers[sentencepiece] (>=4.26.0,<4.38.0)"] furiosa = ["optimum-furiosa"] graphcore = ["optimum-graphcore"] -habana = ["optimum-habana", "transformers (>=4.38.0,<4.39.0)"] +habana = ["optimum-habana", "transformers (>=4.37.0,<4.38.0)"] intel = ["optimum-intel (>=1.15.0)"] neural-compressor = ["optimum-intel[neural-compressor] (>=1.15.0)"] neuron = ["optimum-neuron[neuron] (>=0.0.20)", "transformers (==4.36.2)"] @@ -2636,6 +2648,21 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa typing = ["typing-extensions"] xmp = ["defusedxml"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -2900,6 +2927,42 @@ files = [ {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, ] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -3783,6 +3846,17 @@ dev = ["tokenizers[testing]"] docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "torch" version = "2.3.0" @@ -3839,13 +3913,13 @@ optree = ["optree (>=0.9.1)"] [[package]] name = "tqdm" -version = "4.66.3" +version = "4.66.2" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"}, - {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"}, + {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, + {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, ] [package.dependencies] @@ -4079,13 +4153,13 @@ test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)" [[package]] name = "werkzeug" -version = "3.0.3" +version = "3.0.1" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.8" files = [ - {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, - {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, + {file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"}, + {file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"}, ] [package.dependencies] diff --git a/runtimes/lightgbm/poetry.lock b/runtimes/lightgbm/poetry.lock index e70f1bb24..ef314c10e 100644 --- a/runtimes/lightgbm/poetry.lock +++ b/runtimes/lightgbm/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -952,6 +952,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "lightgbm" version = "4.3.0" @@ -1005,6 +1016,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -1488,6 +1500,21 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -1678,6 +1705,42 @@ python-dotenv = ">=0.21.0" toml = ["tomli (>=2.0.1)"] yaml = ["pyyaml (>=6.0.1)"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -1914,6 +1977,17 @@ files = [ prometheus-client = ">=0.12" starlette = ">=0.35" +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tritonclient" version = "2.42.0" diff --git a/runtimes/mlflow/poetry.lock b/runtimes/mlflow/poetry.lock index 300d7069f..00333ed3b 100644 --- a/runtimes/mlflow/poetry.lock +++ b/runtimes/mlflow/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -1465,6 +1465,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "itsdangerous" version = "2.1.2" @@ -1478,13 +1489,13 @@ files = [ [[package]] name = "jinja2" -version = "3.1.4" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -1878,6 +1889,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -2624,6 +2636,21 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa typing = ["typing-extensions"] xmp = ["defusedxml"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -2876,6 +2903,42 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -3441,6 +3504,17 @@ files = [ {file = "threadpoolctl-3.3.0.tar.gz", hash = "sha256:5dac632b4fa2d43f42130267929af3ba01399ef4bd1882918e92dbc30365d30c"}, ] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "torch" version = "2.2.1" @@ -3572,13 +3646,13 @@ scipy = ["scipy"] [[package]] name = "tqdm" -version = "4.66.3" +version = "4.66.2" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"}, - {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"}, + {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, + {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, ] [package.dependencies] @@ -3757,13 +3831,13 @@ testing = ["coverage (>=5.0)", "pytest", "pytest-cover"] [[package]] name = "werkzeug" -version = "3.0.3" +version = "3.0.1" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.8" files = [ - {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, - {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, + {file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"}, + {file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"}, ] [package.dependencies] diff --git a/runtimes/mllib/poetry.lock b/runtimes/mllib/poetry.lock index 1ef7c10e8..c64147ce3 100644 --- a/runtimes/mllib/poetry.lock +++ b/runtimes/mllib/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -952,6 +952,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "mlserver" version = "1.5.0.dev1" @@ -981,6 +992,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -1464,6 +1476,21 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -1685,6 +1712,42 @@ mllib = ["numpy (>=1.15)"] pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -1879,6 +1942,17 @@ files = [ prometheus-client = ">=0.12" starlette = ">=0.35" +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tritonclient" version = "2.42.0" diff --git a/runtimes/sklearn/poetry.lock b/runtimes/sklearn/poetry.lock index 8ed7a4ead..154e39ca3 100644 --- a/runtimes/sklearn/poetry.lock +++ b/runtimes/sklearn/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -952,15 +952,26 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "joblib" -version = "1.4.2" +version = "1.4.0" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" files = [ - {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, - {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, + {file = "joblib-1.4.0-py3-none-any.whl", hash = "sha256:42942470d4062537be4d54c83511186da1fc14ba354961a2114da91efa9a4ed7"}, + {file = "joblib-1.4.0.tar.gz", hash = "sha256:1eb0dc091919cd384490de890cb5dfd538410a6d4b3b54eef09fb8c50b409b1c"}, ] [[package]] @@ -992,6 +1003,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -1475,6 +1487,21 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -1665,6 +1692,42 @@ python-dotenv = ">=0.21.0" toml = ["tomli (>=2.0.1)"] yaml = ["pyyaml (>=6.0.1)"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -1954,6 +2017,17 @@ files = [ {file = "threadpoolctl-3.3.0.tar.gz", hash = "sha256:5dac632b4fa2d43f42130267929af3ba01399ef4bd1882918e92dbc30365d30c"}, ] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tritonclient" version = "2.42.0" diff --git a/runtimes/xgboost/poetry.lock b/runtimes/xgboost/poetry.lock index f9bcec395..521437325 100644 --- a/runtimes/xgboost/poetry.lock +++ b/runtimes/xgboost/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -952,6 +952,17 @@ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "joblib" version = "1.3.2" @@ -992,6 +1003,7 @@ protobuf = "*" py-grpc-prometheus = "*" pydantic = "2.7.1" pydantic-settings = "2.2.1" +pytest-lazy-fixture = "^0.6.3" python-dotenv = "*" python-multipart = "*" starlette-exporter = "*" @@ -1475,6 +1487,21 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -1665,6 +1692,42 @@ python-dotenv = ">=0.21.0" toml = ["tomli (>=2.0.1)"] yaml = ["pyyaml (>=6.0.1)"] +[[package]] +name = "pytest" +version = "8.2.0" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.0-py3-none-any.whl", hash = "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233"}, + {file = "pytest-8.2.0.tar.gz", hash = "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-lazy-fixture" +version = "0.6.3" +description = "It helps to use fixtures in pytest.mark.parametrize" +optional = false +python-versions = "*" +files = [ + {file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"}, + {file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"}, +] + +[package.dependencies] +pytest = ">=3.2.5" + [[package]] name = "python-dateutil" version = "2.8.2" @@ -1954,6 +2017,17 @@ files = [ {file = "threadpoolctl-3.3.0.tar.gz", hash = "sha256:5dac632b4fa2d43f42130267929af3ba01399ef4bd1882918e92dbc30365d30c"}, ] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tritonclient" version = "2.42.0" diff --git a/tests/batching/test_hooks.py b/tests/batching/test_hooks.py index e573298c7..4e6a43203 100644 --- a/tests/batching/test_hooks.py +++ b/tests/batching/test_hooks.py @@ -16,6 +16,26 @@ async def test_batching_predict( assert len(response.outputs) == 1 +async def test_batching_predict_stream( + text_stream_model: MLModel, generate_request: InferenceRequest, caplog +): + # Force batching to be enabled + text_stream_model.settings.max_batch_size = 10 + text_stream_model.settings.max_batch_time = 0.4 + await load_batching(text_stream_model) + + async def get_stream_request(request): + yield request + + stream = text_stream_model.predict_stream(get_stream_request(generate_request)) + responses = [r async for r in stream] + + assert len(responses) > 0 + assert isinstance(responses[0], InferenceResponse) + assert len(responses[0].outputs) == 1 + assert "not supported for inference streaming" in caplog.records[0].message + + @pytest.mark.parametrize( "max_batch_size, max_batch_time", [ diff --git a/tests/conftest.py b/tests/conftest.py index 2bacfad5f..2a8c3a7e5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ from mlserver import types, Settings, ModelSettings, MLServer from .metrics.utils import unregister_metrics -from .fixtures import SumModel, ErrorModel, SimpleModel +from .fixtures import SumModel, TextModel, TextStreamModel, ErrorModel, SimpleModel from .utils import RESTClient, get_available_ports, _pack, _get_tarball_name MIN_PYTHON_VERSION = (3, 9) @@ -191,6 +191,43 @@ async def sum_model( return await model_registry.get_model(sum_model_settings.name) +@pytest.fixture +def text_model_settings() -> ModelSettings: + return ModelSettings( + name="text-model", + implementation=TextModel, + parallel_workers=0, + parameters={"version": "v1.2.3"}, + ) + + +@pytest.fixture +async def text_model( + model_registry: MultiModelRegistry, text_model_settings: ModelSettings +) -> TextModel: + await model_registry.load(text_model_settings) + return await model_registry.get_model(text_model_settings.name) + + +@pytest.fixture +def text_stream_model_settings() -> ModelSettings: + # TODO: Enable parallel_workers once stream is supported + return ModelSettings( + name="text-stream-model", + implementation=TextStreamModel, + parallel_workers=0, + parameters={"version": "v1.2.3"}, + ) + + +@pytest.fixture +async def text_stream_model( + model_registry: MultiModelRegistry, text_stream_model_settings: ModelSettings +) -> TextModel: + await model_registry.load(text_stream_model_settings) + return await model_registry.get_model(text_stream_model_settings.name) + + @pytest.fixture def metadata_server_response() -> types.MetadataServerResponse: payload_path = os.path.join(TESTDATA_PATH, "metadata-server-response.json") @@ -209,6 +246,12 @@ def inference_request() -> types.InferenceRequest: return types.InferenceRequest.parse_file(payload_path) +@pytest.fixture +def generate_request() -> types.InferenceRequest: + payload_path = os.path.join(TESTDATA_PATH, "generate-request.json") + return types.InferenceRequest.parse_file(payload_path) + + @pytest.fixture def inference_request_invalid_datatype() -> Dict[str, Any]: payload_path = os.path.join( @@ -238,6 +281,12 @@ def settings() -> Settings: return Settings.parse_file(settings_path) +@pytest.fixture +def settings_stream() -> Settings: + settings_path = os.path.join(TESTDATA_PATH, "settings-stream.json") + return Settings.parse_file(settings_path) + + @pytest.fixture def data_plane( settings: Settings, diff --git a/tests/fixtures.py b/tests/fixtures.py index b99e9287a..a5c19a7e1 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -14,7 +14,7 @@ except ImportError: sklearn = None -from typing import Dict, List +from typing import Dict, List, AsyncIterator from mlserver import MLModel from mlserver.types import ( @@ -64,6 +64,47 @@ async def predict(self, payload: InferenceRequest) -> InferenceResponse: return response +class TextModel(MLModel): + async def predict(self, payload: InferenceRequest) -> InferenceResponse: + text = StringCodec.decode_input(payload.inputs[0])[0] + return InferenceResponse( + model_name=self._settings.name, + outputs=[ + StringCodec.encode_output( + name="output", + payload=[text], + use_bytes=True, + ), + ], + ) + + +class TextStreamModel(MLModel): + async def predict_stream( + self, payloads: AsyncIterator[InferenceRequest] + ) -> AsyncIterator[InferenceResponse]: + payload = [_ async for _ in payloads][0] + text = StringCodec.decode_input(payload.inputs[0])[0] + words = text.split(" ") + + split_text = [] + for i, word in enumerate(words): + split_text.append(word if i == 0 else " " + word) + + for word in split_text: + await asyncio.sleep(0.5) + yield InferenceResponse( + model_name=self._settings.name, + outputs=[ + StringCodec.encode_output( + name="output", + payload=[word], + use_bytes=True, + ), + ], + ) + + class ErrorModel(MLModel): error_message = "something really bad happened" diff --git a/tests/grpc/conftest.py b/tests/grpc/conftest.py index 8d8140aa9..f5c6a950d 100644 --- a/tests/grpc/conftest.py +++ b/tests/grpc/conftest.py @@ -58,6 +58,12 @@ def model_infer_request() -> pb.ModelInferRequest: return _read_testdata_pb(payload_path, pb.ModelInferRequest) +@pytest.fixture +def model_generate_request() -> pb.ModelInferRequest: + payload_path = os.path.join(TESTDATA_GRPC_PATH, "model-generate-request.json") + return _read_testdata_pb(payload_path, pb.ModelInferRequest) + + @pytest.fixture def model_infer_request_invalid_datatype() -> pb.ModelInferRequest: payload_path = os.path.join( diff --git a/tests/grpc/test_servicers.py b/tests/grpc/test_servicers.py index 67eb382e6..618c386f3 100644 --- a/tests/grpc/test_servicers.py +++ b/tests/grpc/test_servicers.py @@ -1,5 +1,6 @@ -import pytest import grpc +import pytest +from pytest_lazyfixture import lazy_fixture from mlserver.cloudevents import ( @@ -74,13 +75,48 @@ async def test_model_infer( model_infer_request.ClearField("model_version") prediction = await inference_service_stub.ModelInfer(model_infer_request) - expected = pb.InferTensorContents(int64_contents=[6]) assert len(prediction.outputs) == 1 assert prediction.outputs[0].contents == expected +@pytest.mark.parametrize("settings", [lazy_fixture("settings_stream")]) +@pytest.mark.parametrize( + "sum_model_settings", [lazy_fixture("text_stream_model_settings")] +) +@pytest.mark.parametrize("sum_model", [lazy_fixture("text_stream_model")]) +@pytest.mark.parametrize( + "model_name,model_version", + [("text-stream-model", "v1.2.3"), ("text-stream-model", None)], +) +async def test_model_stream_infer( + inference_service_stub, + model_generate_request, + model_name, + model_version, +): + model_generate_request.model_name = model_name + if model_version is not None: + model_generate_request.model_version = model_version + else: + model_generate_request.ClearField("model_version") + + i = -1 + text = ["What", " is", " the", " capital", " of", " France?"] + + async def get_stream_request(request): + yield request + + async for prediction in inference_service_stub.ModelStreamInfer( + get_stream_request(model_generate_request) + ): + i += 1 + expected = pb.InferTensorContents(bytes_contents=[text[i].encode()]) + assert len(prediction.outputs) == 1 + assert prediction.outputs[0].contents == expected + + async def test_model_infer_raw_contents(inference_service_stub, model_infer_request): # Prepare request with raw contents for input_tensor in model_infer_request.inputs: @@ -142,6 +178,26 @@ async def test_model_infer_error(inference_service_stub, model_infer_request): assert err.value.details() == "Model my-model with version v1.2.3 not found" +@pytest.mark.parametrize("settings", [lazy_fixture("settings_stream")]) +@pytest.mark.parametrize( + "sum_model_settings", [lazy_fixture("text_stream_model_settings")] +) +@pytest.mark.parametrize("sum_model", [lazy_fixture("text_stream_model")]) +async def test_model_stream_infer_error(inference_service_stub, model_generate_request): + async def get_stream_request(request): + yield request + + with pytest.raises(grpc.RpcError) as err: + model_generate_request.model_name = "my-model" + async for _ in inference_service_stub.ModelStreamInfer( + get_stream_request(model_generate_request) + ): + pass + + assert err.value.code() == grpc.StatusCode.NOT_FOUND + assert err.value.details() == "Model my-model with version v1.2.3 not found" + + async def test_model_repository_index( inference_service_stub, grpc_repository_index_request, diff --git a/tests/handlers/test_dataplane.py b/tests/handlers/test_dataplane.py index d1c957fe9..2871f81b7 100644 --- a/tests/handlers/test_dataplane.py +++ b/tests/handlers/test_dataplane.py @@ -96,6 +96,23 @@ async def test_infer(data_plane, sum_model, inference_request): assert prediction.outputs[0].data == TensorData(root=[6]) +async def test_infer_stream(data_plane, text_stream_model, generate_request): + async def streamed_request(request): + yield request + + stream = data_plane.infer_stream( + payloads=streamed_request(generate_request), + name=text_stream_model.name, + version=text_stream_model.version, + ) + + completion = [tok async for tok in stream] + assert len(completion) == 6 + + concat_completion = b"".join([tok.outputs[0].data.root[0] for tok in completion]) + assert concat_completion == b"What is the capital of France?" + + async def test_infer_error_not_ready(data_plane, sum_model, inference_request): sum_model.ready = False with pytest.raises(ModelNotReady): diff --git a/tests/rest/test_endpoints.py b/tests/rest/test_endpoints.py index bd3928512..f74e77cdc 100644 --- a/tests/rest/test_endpoints.py +++ b/tests/rest/test_endpoints.py @@ -1,13 +1,20 @@ import pytest +from pytest_lazyfixture import lazy_fixture from typing import Optional +from httpx import AsyncClient +from httpx_sse import aconnect_sse from mlserver import __version__ +from mlserver.settings import ModelSettings +from mlserver.model import MLModel from mlserver.types import ( + InferenceRequest, InferenceResponse, MetadataServerResponse, MetadataModelResponse, TensorData, + RepositoryIndexRequest, ) from mlserver.cloudevents import ( CLOUDEVENTS_HEADER_SPECVERSION_DEFAULT, @@ -15,28 +22,28 @@ ) -async def test_live(rest_client): +async def test_live(rest_client: AsyncClient): endpoint = "/v2/health/live" response = await rest_client.get(endpoint) assert response.status_code == 200 -async def test_ready(rest_client): +async def test_ready(rest_client: AsyncClient): endpoint = "/v2/health/ready" response = await rest_client.get(endpoint) assert response.status_code == 200 -async def test_model_ready(rest_client, sum_model): +async def test_model_ready(rest_client: AsyncClient, sum_model: MLModel): endpoint = f"/v2/models/{sum_model.name}/versions/{sum_model.version}/ready" response = await rest_client.get(endpoint) assert response.status_code == 200 -async def test_metadata(rest_client): +async def test_metadata(rest_client: AsyncClient): endpoint = "/v2" response = await rest_client.get(endpoint) @@ -47,7 +54,7 @@ async def test_metadata(rest_client): assert metadata.extensions == [] -async def test_openapi(rest_client): +async def test_openapi(rest_client: AsyncClient): endpoint = "/v2/docs" response = await rest_client.get(endpoint) @@ -55,7 +62,7 @@ async def test_openapi(rest_client): assert "html" in response.headers["content-type"] -async def test_docs(rest_client): +async def test_docs(rest_client: AsyncClient): endpoint = "/v2/docs/dataplane.json" response = await rest_client.get(endpoint) @@ -63,7 +70,9 @@ async def test_docs(rest_client): assert "openapi" in response.json() -async def test_model_metadata(rest_client, sum_model_settings): +async def test_model_metadata( + rest_client: AsyncClient, sum_model_settings: ModelSettings +): endpoint = f"v2/models/{sum_model_settings.name}" response = await rest_client.get(endpoint) @@ -79,7 +88,7 @@ async def test_model_metadata(rest_client, sum_model_settings): "model_name,model_version", [("sum-model", "v1.2.3"), ("sum-model", None)] ) async def test_model_openapi( - rest_client, model_name: str, model_version: Optional[str] + rest_client: AsyncClient, model_name: str, model_version: Optional[str] ): endpoint = f"/v2/models/{model_name}/docs/dataplane.json" if model_version is not None: @@ -95,7 +104,9 @@ async def test_model_openapi( @pytest.mark.parametrize( "model_name,model_version", [("sum-model", "v1.2.3"), ("sum-model", None)] ) -async def test_model_docs(rest_client, model_name: str, model_version: Optional[str]): +async def test_model_docs( + rest_client: AsyncClient, model_name: str, model_version: Optional[str] +): endpoint = f"/v2/models/{model_name}/docs" if model_version is not None: endpoint = f"/v2/models/{model_name}/versions/{model_version}/docs" @@ -109,16 +120,16 @@ async def test_model_docs(rest_client, model_name: str, model_version: Optional[ "model_name,model_version", [("sum-model", "v1.2.3"), ("sum-model", None)] ) async def test_infer( - rest_client, - inference_request, - model_name, - model_version, + rest_client: AsyncClient, + inference_request: InferenceRequest, + model_name: str, + model_version: Optional[str], ): endpoint = f"/v2/models/{model_name}/infer" if model_version is not None: endpoint = f"/v2/models/{model_name}/versions/{model_version}/infer" - response = await rest_client.post(endpoint, json=inference_request.model_dump()) + response = await rest_client.post(endpoint, json=inference_request.model_dump()) assert response.status_code == 200 prediction = InferenceResponse.model_validate(response.json()) @@ -126,10 +137,58 @@ async def test_infer( assert prediction.outputs[0].data == TensorData(root=[6]) +@pytest.mark.parametrize("sum_model", [lazy_fixture("text_model")]) +@pytest.mark.parametrize( + "model_name,model_version", [("text-model", "v1.2.3"), ("text-model", None)] +) +async def test_generate( + rest_client: AsyncClient, + generate_request: InferenceRequest, + model_name: str, + model_version: Optional[str], +): + endpoint = f"/v2/models/{model_name}/generate" + if model_version is not None: + endpoint = f"/v2/models/{model_name}/versions/{model_version}/generate" + + response = await rest_client.post(endpoint, json=generate_request.model_dump()) + assert response.status_code == 200 + + prediction = InferenceResponse.model_validate(response.json()) + assert len(prediction.outputs) == 1 + assert prediction.outputs[0].data == TensorData( + root=["What is the capital of France?"] + ) + + +@pytest.mark.parametrize("settings", [lazy_fixture("settings_stream")]) +@pytest.mark.parametrize("sum_model", [lazy_fixture("text_stream_model")]) +@pytest.mark.parametrize("endpoint", ["generate_stream", "infer_stream"]) +async def test_generate_stream( + rest_client: AsyncClient, + generate_request: InferenceRequest, + text_stream_model: MLModel, + endpoint: str, +): + endpoint = f"/v2/models/{text_stream_model.name}/{endpoint}" + conn = aconnect_sse( + rest_client, "POST", endpoint, json=generate_request.model_dump() + ) + ref_text = ["What", " is", " the", " capital", " of", " France?"] + + async with conn as stream: + i = 0 + async for response in stream.aiter_sse(): + prediction = InferenceResponse.model_validate(response.json()) + assert len(prediction.outputs) == 1 + assert prediction.outputs[0].data == TensorData(root=[ref_text[i]]) + i += 1 + + async def test_infer_headers( - rest_client, - inference_request, - sum_model_settings, + rest_client: AsyncClient, + inference_request: InferenceRequest, + sum_model_settings: ModelSettings, ): endpoint = f"/v2/models/{sum_model_settings.name}/infer" response = await rest_client.post( @@ -147,15 +206,20 @@ async def test_infer_headers( ) -async def test_infer_error(rest_client, inference_request): - endpoint = "/v2/models/my-model/versions/v0/infer" +@pytest.mark.parametrize("endpoint", ["infer", "generate"]) +async def test_infer_error( + rest_client: AsyncClient, inference_request: InferenceRequest, endpoint: str +): + endpoint = f"/v2/models/my-model/versions/v0/{endpoint}" response = await rest_client.post(endpoint, json=inference_request.model_dump()) assert response.status_code == 404 assert response.json()["error"] == "Model my-model with version v0 not found" -async def test_model_repository_index(rest_client, repository_index_request): +async def test_model_repository_index( + rest_client: AsyncClient, repository_index_request: RepositoryIndexRequest +): endpoint = "/v2/repository/index" response = await rest_client.post( endpoint, json=repository_index_request.model_dump() @@ -167,7 +231,9 @@ async def test_model_repository_index(rest_client, repository_index_request): assert len(models) == 1 -async def test_model_repository_unload(rest_client, sum_model_settings): +async def test_model_repository_unload( + rest_client: AsyncClient, sum_model_settings: ModelSettings +): endpoint = f"/v2/repository/models/{sum_model_settings.name}/unload" response = await rest_client.post(endpoint) @@ -178,8 +244,8 @@ async def test_model_repository_unload(rest_client, sum_model_settings): async def test_model_repository_load( - rest_client, - sum_model_settings, + rest_client: AsyncClient, + sum_model_settings: ModelSettings, ): await rest_client.post(f"/v2/repository/models/{sum_model_settings.name}/unload") @@ -192,7 +258,9 @@ async def test_model_repository_load( assert model_metadata.status_code == 200 -async def test_model_repository_load_error(rest_client, sum_model_settings): +async def test_model_repository_load_error( + rest_client: AsyncClient, sum_model_settings: ModelSettings +): endpoint = "/v2/repository/models/my-model/load" response = await rest_client.post(endpoint) diff --git a/tests/rest/test_responses.py b/tests/rest/test_responses.py new file mode 100644 index 000000000..81c155e7d --- /dev/null +++ b/tests/rest/test_responses.py @@ -0,0 +1,13 @@ +from mlserver.types import InferenceRequest + +from mlserver.rest.responses import ServerSentEvent + + +def test_sse_encode(inference_request: InferenceRequest): + sse = ServerSentEvent(inference_request) + encoded = sse.encode() + as_string = encoded.decode("utf-8") + + expected_json = inference_request.model_dump_json().replace(" ", "") + expected = f"data: {expected_json}\n\n" + assert as_string == expected diff --git a/tests/test_model.py b/tests/test_model.py index 85ccd4af6..8f9fd5665 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,4 +1,5 @@ import pytest +import inspect import numpy as np import pandas as pd @@ -10,6 +11,45 @@ from mlserver.codecs.pandas import PandasCodec from mlserver.model import MLModel +from .fixtures import TextModel, TextStreamModel + + +async def stream_generator(generate_request): + yield generate_request + + +async def test_predict_stream_fallback( + text_model: TextModel, + generate_request: InferenceRequest, +): + generator = text_model.predict_stream(stream_generator(generate_request)) + assert inspect.isasyncgen(generator) + + responses = [] + async for response in generator: + responses.append(response) + + assert len(responses) == 1 + assert len(responses[0].outputs) > 0 + + +async def test_predict_stream( + text_stream_model: TextStreamModel, + generate_request: InferenceRequest, +): + generator = text_stream_model.predict_stream(stream_generator(generate_request)) + assert inspect.isasyncgen(generator) + + responses = [] + async for response in generator: + responses.append(response) + + ref_text = ["What", " is", " the", " capital", " of", " France?"] + assert len(responses) == len(ref_text) + + for idx in range(len(ref_text)): + assert ref_text[idx] == StringCodec.decode_output(responses[idx].outputs[0])[0] + @pytest.mark.parametrize( "request_input,expected", diff --git a/tests/testdata/generate-request.json b/tests/testdata/generate-request.json new file mode 100644 index 000000000..bebe5ef77 --- /dev/null +++ b/tests/testdata/generate-request.json @@ -0,0 +1,18 @@ +{ + "inputs": [ + { + "name": "prompt", + "shape": [1], + "datatype": "BYTES", + "data": ["What is the capital of France?"], + "parameters": { + "content_type": "str" + } + } + ], + "outputs": [ + { + "name": "output" + } + ] +} diff --git a/tests/testdata/grpc/model-generate-request.json b/tests/testdata/grpc/model-generate-request.json new file mode 100644 index 000000000..6661fd26c --- /dev/null +++ b/tests/testdata/grpc/model-generate-request.json @@ -0,0 +1,19 @@ +{ + "model_name": "text-model", + "model_version": "v1.2.3", + "inputs": [ + { + "name": "prompt", + "datatype": "BYTES", + "shape": [1, 1], + "contents": { + "bytes_contents": ["V2hhdCBpcyB0aGUgY2FwaXRhbCBvZiBGcmFuY2U/"] + } + } + ], + "outputs":[ + { + "name": "output" + } + ] +} diff --git a/tests/testdata/settings-stream.json b/tests/testdata/settings-stream.json new file mode 100644 index 000000000..6727d5b59 --- /dev/null +++ b/tests/testdata/settings-stream.json @@ -0,0 +1,10 @@ +{ + "debug": true, + "host": "127.0.0.1", + "parallel_workers": 0, + "gzip_enabled": false, + "metrics_endpoint": null, + "cors_settings": { + "allow_origins": ["*"] + } +}