From 780545b334e6e8121a86a77b66d9971de80ce6a3 Mon Sep 17 00:00:00 2001 From: Aaron Peddle Date: Mon, 28 Aug 2023 15:45:46 -0700 Subject: [PATCH 1/7] add idle time status --- potassium/potassium.py | 19 +++++++++++++++---- setup.py | 1 - tests/test_endpoints.py | 24 ++++++++++++------------ 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/potassium/potassium.py b/potassium/potassium.py index ea21417..43d4f97 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -1,3 +1,4 @@ +import time from flask import Flask, request, make_response, abort from werkzeug.serving import make_server from threading import Thread, Lock, Condition @@ -47,6 +48,7 @@ def __init__(self, name): self._gpu_lock = Lock() self._background_task_cv = Condition() self._sequence_number = 0 + self._idle_start_time = 0 self._flask_app = self._create_flask_app() # @@ -162,6 +164,7 @@ def _handle_generic(self, endpoint, flask_request): res = make_response(tb_str) res.status_code = 500 res.headers['X-Endpoint-Type'] = endpoint.type + self._idle_start_time = time.time() self._gpu_lock.release() elif endpoint.type == "background": req = Request( @@ -178,7 +181,8 @@ def task(endpoint, lock, req): finally: with self._background_task_cv: self._background_task_cv.notify_all() - # release lock + + self._idle_start_time = time.time() lock.release() thread = Thread(target=task, args=(endpoint, self._gpu_lock, req)) @@ -219,14 +223,20 @@ def handle(path): @flask_app.route('/__status__', methods=["GET"]) def status(): + idle_time = 0 + gpu_available = not self._gpu_lock.locked() + + if gpu_available: + idle_time = int((time.time() - self._idle_start_time)*1000) + res = make_response({ - "gpu_available": not self._gpu_lock.locked(), - "sequence_number": self._sequence_number + "gpu_available": gpu_available, + "sequence_number": self._sequence_number, + "idle_time": idle_time }) res.status_code = 200 res.headers['X-Endpoint-Type'] = "status" - res return res return flask_app @@ -237,4 +247,5 @@ def serve(self, host="0.0.0.0", port=8000): self._init_func() server = make_server(host, port, self._flask_app) print(colored(f"Serving at http://{host}:{port}\n------", 'green')) + self._idle_start_time = time.time() server.serve_forever() diff --git a/setup.py b/setup.py index 8a04444..f2480db 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ from distutils.core import setup -import setuptools from pathlib import Path this_directory = Path(__file__).parent diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 9bf4d43..f5c6eaf 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -95,10 +95,10 @@ def background(context: dict, request: potassium.Request): res = client.get("/__status__", json={}) assert res.status_code == 200 - assert res.json == { - "gpu_available": True, - "sequence_number": 0, - } + assert res.json is not None + assert res.json["gpu_available"] == True + assert res.json["sequence_number"] == 0 + assert res.json["idle_time"] > 0 # send background post in separate thread res = client.post("/background", json={}) @@ -108,10 +108,10 @@ def background(context: dict, request: potassium.Request): res = client.get("/__status__", json={}) assert res.status_code == 200 - assert res.json == { - "gpu_available": False, - "sequence_number": 1, - } + assert res.json is not None + assert res.json["gpu_available"] == False + assert res.json["sequence_number"] == 1 + assert res.json["idle_time"] == 0 # notify background thread to continue with resolve_background_condition: @@ -124,10 +124,10 @@ def background(context: dict, request: potassium.Request): res = client.get("/__status__", json={}) assert res.status_code == 200 - assert res.json == { - "gpu_available": True, - "sequence_number": 1, - } + assert res.json is not None + assert res.json["gpu_available"] == True + assert res.json["sequence_number"] == 1 + assert res.json["idle_time"] > 0 def test_wait_for_background_task(): app = potassium.Potassium("my_app") From ac77503c6ac6c9cd652e2e8332d776e557615ccb Mon Sep 17 00:00:00 2001 From: ErikKaumk Date: Mon, 11 Sep 2023 10:52:49 +0300 Subject: [PATCH 2/7] inference timeout --- potassium/potassium.py | 7 ++++++- tests/test_endpoints.py | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/potassium/potassium.py b/potassium/potassium.py index 43d4f97..c740157 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -49,6 +49,7 @@ def __init__(self, name): self._background_task_cv = Condition() self._sequence_number = 0 self._idle_start_time = 0 + self._inference_start_time = 0 self._flask_app = self._create_flask_app() # @@ -147,6 +148,7 @@ def _handle_generic(self, endpoint, flask_request): return res res = None + self._inference_start_time = time.time() if endpoint.type == "handler": req = Request( @@ -224,6 +226,7 @@ def handle(path): @flask_app.route('/__status__', methods=["GET"]) def status(): idle_time = 0 + inference_time = int((time.time() - self._inference_start_time)*1000) gpu_available = not self._gpu_lock.locked() if gpu_available: @@ -232,7 +235,8 @@ def status(): res = make_response({ "gpu_available": gpu_available, "sequence_number": self._sequence_number, - "idle_time": idle_time + "idle_time": idle_time, + "inference_time": inference_time, }) res.status_code = 200 @@ -248,4 +252,5 @@ def serve(self, host="0.0.0.0", port=8000): server = make_server(host, port, self._flask_app) print(colored(f"Serving at http://{host}:{port}\n------", 'green')) self._idle_start_time = time.time() + self._inference_start_time = time.time() server.serve_forever() diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index f5c6eaf..8187675 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -99,6 +99,7 @@ def background(context: dict, request: potassium.Request): assert res.json["gpu_available"] == True assert res.json["sequence_number"] == 0 assert res.json["idle_time"] > 0 + assert res.json["inference_time"] > 0 # send background post in separate thread res = client.post("/background", json={}) @@ -112,6 +113,7 @@ def background(context: dict, request: potassium.Request): assert res.json["gpu_available"] == False assert res.json["sequence_number"] == 1 assert res.json["idle_time"] == 0 + assert res.json["inference_time"] == 0 # notify background thread to continue with resolve_background_condition: @@ -128,6 +130,7 @@ def background(context: dict, request: potassium.Request): assert res.json["gpu_available"] == True assert res.json["sequence_number"] == 1 assert res.json["idle_time"] > 0 + assert res.json["inference_time"] > 0 def test_wait_for_background_task(): app = potassium.Potassium("my_app") From 3ab1ac78094d73d91a06627d119d3a127c0d4c2f Mon Sep 17 00:00:00 2001 From: ErikKaumk Date: Mon, 11 Sep 2023 15:49:43 +0300 Subject: [PATCH 3/7] need to thread --- potassium/potassium.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/potassium/potassium.py b/potassium/potassium.py index c740157..1e8cd5a 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -249,7 +249,7 @@ def status(): def serve(self, host="0.0.0.0", port=8000): print(colored("------\nStarting Potassium Server 🍌", 'yellow')) self._init_func() - server = make_server(host, port, self._flask_app) + server = make_server(host, port, self._flask_app, threaded=True) print(colored(f"Serving at http://{host}:{port}\n------", 'green')) self._idle_start_time = time.time() self._inference_start_time = time.time() From d34e839ee9ce513aa6f8afbf0b780c9206a778cf Mon Sep 17 00:00:00 2001 From: ErikKaumk Date: Mon, 11 Sep 2023 19:11:15 +0300 Subject: [PATCH 4/7] improve tests --- tests/test_endpoints.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 8187675..0a77a80 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -105,6 +105,9 @@ def background(context: dict, request: potassium.Request): res = client.post("/background", json={}) assert res.status_code == 200 + # add a small sleep for inference time to be above 0 + time.sleep(0.1) + # check status res = client.get("/__status__", json={}) @@ -113,7 +116,7 @@ def background(context: dict, request: potassium.Request): assert res.json["gpu_available"] == False assert res.json["sequence_number"] == 1 assert res.json["idle_time"] == 0 - assert res.json["inference_time"] == 0 + assert res.json["inference_time"] > 0 # notify background thread to continue with resolve_background_condition: From 71391f80ca08f354118de7a666671f0dd32626f3 Mon Sep 17 00:00:00 2001 From: ErikKaumk Date: Tue, 12 Sep 2023 12:01:59 +0300 Subject: [PATCH 5/7] inference time not including cold boot time --- potassium/potassium.py | 6 +++++- tests/test_endpoints.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/potassium/potassium.py b/potassium/potassium.py index 1e8cd5a..b1c961d 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -226,9 +226,13 @@ def handle(path): @flask_app.route('/__status__', methods=["GET"]) def status(): idle_time = 0 - inference_time = int((time.time() - self._inference_start_time)*1000) gpu_available = not self._gpu_lock.locked() + if self._inference_start_time != 0: + inference_time = int((time.time() - self._inference_start_time)*1000) + else: + inference_time = 0 + if gpu_available: idle_time = int((time.time() - self._idle_start_time)*1000) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 0a77a80..6295201 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -99,7 +99,7 @@ def background(context: dict, request: potassium.Request): assert res.json["gpu_available"] == True assert res.json["sequence_number"] == 0 assert res.json["idle_time"] > 0 - assert res.json["inference_time"] > 0 + assert res.json["inference_time"] == 0 # send background post in separate thread res = client.post("/background", json={}) From 8a26304c53e02d2dff8c73c78b7d2382e017e040 Mon Sep 17 00:00:00 2001 From: ErikKaumk Date: Wed, 13 Sep 2023 20:21:09 +0300 Subject: [PATCH 6/7] small change --- potassium/potassium.py | 6 ++++-- tests/test_endpoints.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/potassium/potassium.py b/potassium/potassium.py index b1c961d..e16d92d 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -167,6 +167,7 @@ def _handle_generic(self, endpoint, flask_request): res.status_code = 500 res.headers['X-Endpoint-Type'] = endpoint.type self._idle_start_time = time.time() + self._inference_start_time = 0 self._gpu_lock.release() elif endpoint.type == "background": req = Request( @@ -185,6 +186,7 @@ def task(endpoint, lock, req): self._background_task_cv.notify_all() self._idle_start_time = time.time() + self._inference_start_time = 0 lock.release() thread = Thread(target=task, args=(endpoint, self._gpu_lock, req)) @@ -228,11 +230,11 @@ def status(): idle_time = 0 gpu_available = not self._gpu_lock.locked() - if self._inference_start_time != 0: + if self._inference_start_time != 0: inference_time = int((time.time() - self._inference_start_time)*1000) else: inference_time = 0 - + if gpu_available: idle_time = int((time.time() - self._idle_start_time)*1000) diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 6295201..6e621a6 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -133,7 +133,7 @@ def background(context: dict, request: potassium.Request): assert res.json["gpu_available"] == True assert res.json["sequence_number"] == 1 assert res.json["idle_time"] > 0 - assert res.json["inference_time"] > 0 + assert res.json["inference_time"] == 0 def test_wait_for_background_task(): app = potassium.Potassium("my_app") From f77c4f716cd85ecf891037d744e3bee7db84dd34 Mon Sep 17 00:00:00 2001 From: ErikKaumk Date: Thu, 14 Sep 2023 12:25:43 +0300 Subject: [PATCH 7/7] changed var name and set to None (instead of zero) if not set --- potassium/potassium.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/potassium/potassium.py b/potassium/potassium.py index e16d92d..3d13f15 100644 --- a/potassium/potassium.py +++ b/potassium/potassium.py @@ -49,7 +49,7 @@ def __init__(self, name): self._background_task_cv = Condition() self._sequence_number = 0 self._idle_start_time = 0 - self._inference_start_time = 0 + self._last_inference_start_time = None self._flask_app = self._create_flask_app() # @@ -148,7 +148,7 @@ def _handle_generic(self, endpoint, flask_request): return res res = None - self._inference_start_time = time.time() + self._last_inference_start_time = time.time() if endpoint.type == "handler": req = Request( @@ -167,7 +167,7 @@ def _handle_generic(self, endpoint, flask_request): res.status_code = 500 res.headers['X-Endpoint-Type'] = endpoint.type self._idle_start_time = time.time() - self._inference_start_time = 0 + self._last_inference_start_time = None self._gpu_lock.release() elif endpoint.type == "background": req = Request( @@ -186,7 +186,7 @@ def task(endpoint, lock, req): self._background_task_cv.notify_all() self._idle_start_time = time.time() - self._inference_start_time = 0 + self._last_inference_start_time = None lock.release() thread = Thread(target=task, args=(endpoint, self._gpu_lock, req)) @@ -228,13 +228,12 @@ def handle(path): @flask_app.route('/__status__', methods=["GET"]) def status(): idle_time = 0 + inference_time = 0 gpu_available = not self._gpu_lock.locked() - if self._inference_start_time != 0: - inference_time = int((time.time() - self._inference_start_time)*1000) - else: - inference_time = 0 - + if self._last_inference_start_time != None: + inference_time = int((time.time() - self._last_inference_start_time)*1000) + if gpu_available: idle_time = int((time.time() - self._idle_start_time)*1000) @@ -258,5 +257,4 @@ def serve(self, host="0.0.0.0", port=8000): server = make_server(host, port, self._flask_app, threaded=True) print(colored(f"Serving at http://{host}:{port}\n------", 'green')) self._idle_start_time = time.time() - self._inference_start_time = time.time() server.serve_forever()