From 5713dabe3abf490127e695f9f0541e7f9474ae82 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Tue, 30 Jan 2024 11:29:03 +0800 Subject: [PATCH] feat: allow disabling GPU allocation via env (#4453) Signed-off-by: Frost Ming --- src/_bentoml_impl/server/allocator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/_bentoml_impl/server/allocator.py b/src/_bentoml_impl/server/allocator.py index c822ba30b42..f49fc3e8edb 100644 --- a/src/_bentoml_impl/server/allocator.py +++ b/src/_bentoml_impl/server/allocator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import warnings from typing import Any @@ -12,6 +13,7 @@ from bentoml.exceptions import BentoMLConfigException NVIDIA_GPU = "nvidia.com/gpu" +DISABLE_GPU_ALLOCATION_ENV = "BENTOML_DISABLE_GPU_ALLOCATION" class ResourceAllocator: @@ -26,7 +28,9 @@ def __init__(self) -> None: def assign_gpus(self, count: float) -> list[int]: if count > self.remaining_gpus: warnings.warn( - f"Requested {count} GPUs, but only {self.remaining_gpus} are remaining.", + f"Requested {count} GPUs, but only {self.remaining_gpus} are remaining. " + f"Serving may fail due to inadequate GPUs. Set {DISABLE_GPU_ALLOCATION_ENV}=1 " + "to disable automatic allocation and allocate GPUs manually.", ResourceWarning, stacklevel=3, ) @@ -97,7 +101,7 @@ def get_worker_env( return num_workers, worker_env else: # workers is a number num_workers = workers - if num_gpus: + if num_gpus and DISABLE_GPU_ALLOCATION_ENV not in os.environ: assigned = self.assign_gpus(num_gpus) # assign gpus to all workers worker_env = [