Skip to content

Commit

Permalink
Erikk/ban 375 inference time to potassium (#33)
Browse files Browse the repository at this point in the history
* add idle time status

* inference timeout

* need to thread

* improve tests

* inference time not including cold boot time

* small change

* changed var name and set to None (instead of zero) if not set

---------

Co-authored-by: Aaron Peddle <[email protected]>
  • Loading branch information
ErikKaum and Aaron Peddle authored Sep 16, 2023
1 parent d5db263 commit 9b43d64
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 18 deletions.
30 changes: 25 additions & 5 deletions potassium/potassium.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import time
from flask import Flask, request, make_response, abort
from werkzeug.serving import make_server
from threading import Thread, Lock, Condition
Expand Down Expand Up @@ -47,6 +48,8 @@ def __init__(self, name):
self._gpu_lock = Lock()
self._background_task_cv = Condition()
self._sequence_number = 0
self._idle_start_time = 0
self._last_inference_start_time = None
self._flask_app = self._create_flask_app()

#
Expand Down Expand Up @@ -145,6 +148,7 @@ def _handle_generic(self, endpoint, flask_request):
return res

res = None
self._last_inference_start_time = time.time()

if endpoint.type == "handler":
req = Request(
Expand All @@ -162,6 +166,8 @@ def _handle_generic(self, endpoint, flask_request):
res = make_response(tb_str)
res.status_code = 500
res.headers['X-Endpoint-Type'] = endpoint.type
self._idle_start_time = time.time()
self._last_inference_start_time = None
self._gpu_lock.release()
elif endpoint.type == "background":
req = Request(
Expand All @@ -178,7 +184,9 @@ def task(endpoint, lock, req):
finally:
with self._background_task_cv:
self._background_task_cv.notify_all()
# release lock

self._idle_start_time = time.time()
self._last_inference_start_time = None
lock.release()

thread = Thread(target=task, args=(endpoint, self._gpu_lock, req))
Expand Down Expand Up @@ -219,14 +227,25 @@ def handle(path):

@flask_app.route('/__status__', methods=["GET"])
def status():
idle_time = 0
inference_time = 0
gpu_available = not self._gpu_lock.locked()

if self._last_inference_start_time != None:
inference_time = int((time.time() - self._last_inference_start_time)*1000)

if gpu_available:
idle_time = int((time.time() - self._idle_start_time)*1000)

res = make_response({
"gpu_available": not self._gpu_lock.locked(),
"sequence_number": self._sequence_number
"gpu_available": gpu_available,
"sequence_number": self._sequence_number,
"idle_time": idle_time,
"inference_time": inference_time,
})

res.status_code = 200
res.headers['X-Endpoint-Type'] = "status"
res
return res

return flask_app
Expand All @@ -235,6 +254,7 @@ def status():
def serve(self, host="0.0.0.0", port=8000):
print(colored("------\nStarting Potassium Server 🍌", 'yellow'))
self._init_func()
server = make_server(host, port, self._flask_app)
server = make_server(host, port, self._flask_app, threaded=True)
print(colored(f"Serving at http://{host}:{port}\n------", 'green'))
self._idle_start_time = time.time()
server.serve_forever()
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from distutils.core import setup
import setuptools
from pathlib import Path

this_directory = Path(__file__).parent
Expand Down
30 changes: 18 additions & 12 deletions tests/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,28 @@ def background(context: dict, request: potassium.Request):
res = client.get("/__status__", json={})

assert res.status_code == 200
assert res.json == {
"gpu_available": True,
"sequence_number": 0,
}
assert res.json is not None
assert res.json["gpu_available"] == True
assert res.json["sequence_number"] == 0
assert res.json["idle_time"] > 0
assert res.json["inference_time"] == 0

# send background post in separate thread
res = client.post("/background", json={})
assert res.status_code == 200

# add a small sleep for inference time to be above 0
time.sleep(0.1)

# check status
res = client.get("/__status__", json={})

assert res.status_code == 200
assert res.json == {
"gpu_available": False,
"sequence_number": 1,
}
assert res.json is not None
assert res.json["gpu_available"] == False
assert res.json["sequence_number"] == 1
assert res.json["idle_time"] == 0
assert res.json["inference_time"] > 0

# notify background thread to continue
with resolve_background_condition:
Expand All @@ -124,10 +129,11 @@ def background(context: dict, request: potassium.Request):
res = client.get("/__status__", json={})

assert res.status_code == 200
assert res.json == {
"gpu_available": True,
"sequence_number": 1,
}
assert res.json is not None
assert res.json["gpu_available"] == True
assert res.json["sequence_number"] == 1
assert res.json["idle_time"] > 0
assert res.json["inference_time"] == 0

def test_wait_for_background_task():
app = potassium.Potassium("my_app")
Expand Down

0 comments on commit 9b43d64

Please sign in to comment.