Skip to content

Commit

Permalink
Start a serving sub-project. (#397)
Browse files Browse the repository at this point in the history
This is just a stub that gets the structure and a basic API server/test
(inspired by vllm). Unlike some of the other implementations, this is
going to be pretty thin as most of the work will be done in a companion
project focused on compilation.
  • Loading branch information
stellaraccident authored Feb 6, 2024
1 parent 9d929b0 commit e7f0f94
Show file tree
Hide file tree
Showing 19 changed files with 291 additions and 21 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@ on:

jobs:
black:
strategy:
matrix:
version: [3.11]
os: [ubuntu-latest]
runs-on: ${{matrix.os}}
name: Python Formatting With Black
runs-on: ubuntu-latest
steps:
- name: Checking out repository
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
Expand Down
26 changes: 20 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Turbine Core Test
name: Turbine Unit Tests

on:
workflow_dispatch:
Expand All @@ -17,6 +17,7 @@ concurrency:

jobs:
test:
name: "Test"
strategy:
matrix:
version: [3.11]
Expand All @@ -40,10 +41,23 @@ jobs:
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --index-url https://download.pytorch.org/whl/cpu \
-r core/pytorch-cpu-requirements.txt \
-r core/torchvision-requirements.txt
pip install --upgrade -r core/requirements.txt
pip install -e core[testing]
-r core/torchvision-requirements.txt
pip install --upgrade \
-r core/requirements.txt \
-r mypy-requirements.txt
pip install -e core[testing] -e serving[testing]
- name: Run tests
- name: Run core tests
if: ${{ !cancelled() }}
run: |
pytest -n 4 core/tests/
pytest -n 4 core/
- name: Run serving tests
if: ${{ !cancelled() }}
run: |
pytest -n 4 serving/
- name: MyPy Type Checking
if: ${{ !cancelled() }}
run: |
mypy serving/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ can specify pytorch-cpu and install via:
```
pip install --index-url https://download.pytorch.org/whl/cpu \
-r core/pytorch-cpu-requirements.txt \
-r core torchvision-requirements.txt
-r core/torchvision-requirements.txt
pip install shark-turbine
```

Expand Down
2 changes: 2 additions & 0 deletions core/iree-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
iree-compiler==20240129.785
iree-runtime==20240129.785
4 changes: 4 additions & 0 deletions core/misc-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
numpy>=1.26.3
onnx>=1.15.0
pytest>=8.0.0
pytest-xdist>=3.5.0
4 changes: 1 addition & 3 deletions core/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,4 @@

-r pytorch-cpu-requirements.txt
-r torchvision-requirements.txt

iree-compiler==20240129.785
iree-runtime==20240129.785
-r iree-requirements.txt
13 changes: 7 additions & 6 deletions core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def load_requirement_pins(requirements_file: str):
requirement_pins.update(dict(pin_pairs))


load_requirement_pins("requirements.txt")
load_requirement_pins("iree-requirements.txt")
load_requirement_pins("misc-requirements.txt")
load_requirement_pins("pytorch-cpu-requirements.txt")


Expand Down Expand Up @@ -97,7 +98,7 @@ def initialize_options(self):
],
},
install_requires=[
"numpy",
f"numpy{get_version_spec('numpy')}",
f"iree-compiler{get_version_spec('iree-compiler')}",
f"iree-runtime{get_version_spec('iree-runtime')}",
# Use the [torch-cpu-nightly] spec to get a more recent/specific version.
Expand All @@ -106,12 +107,12 @@ def initialize_options(self):
extras_require={
"torch-cpu-nightly": [f"torch{get_version_spec('torch')}"],
"onnx": [
"onnx>=1.15.0",
f"onnx{get_version_spec('onnx')}",
],
"testing": [
"onnx==1.15.0",
"pytest",
"pytest-xdist",
f"onnx{get_version_spec('onnx')}",
f"pytest{get_version_spec('pytest')}",
f"pytest-xdist{get_version_spec('pytest-xdist')}",
],
},
cmdclass={"build": BuildCommand},
Expand Down
3 changes: 3 additions & 0 deletions mypy-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Typing packages needed for full mypy execution at the project level.
mypy==1.8.0
types-requests
12 changes: 12 additions & 0 deletions serving/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Turbine Serving Infrastructure

This sub-project contains components and infrastructure for serving various
forms of Turbine compiled models. Instead of coming with models, it defines
ABIs that compiled models should adhere to in order to be served. It then
allows them to be delivered as web endpoints via popular APIs.

As emulation can be the sincerest form of flattery, this project derives
substantial inspiration from vllm and the OpenAI APIs, emulating and
interopping with them where possible. It is intended to be the lightest
weight possible reference implementation for serving models with an
opinionated compiled form, built elsewhere in the project.
5 changes: 5 additions & 0 deletions serving/mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[mypy]

explicit_package_bases = True
mypy_path = $MYPY_CONFIG_FILE_DIR
packages = turbine_serving.llm
3 changes: 3 additions & 0 deletions serving/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
2 changes: 2 additions & 0 deletions serving/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fastapi>=0.109.2
uvicorn>=0.27.0
6 changes: 6 additions & 0 deletions serving/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tool:pytest]
testpaths =
./tests
filterwarnings =
# TODO: Remove once flatbuffer 'imp' usage resolved.
ignore::DeprecationWarning
109 changes: 109 additions & 0 deletions serving/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import json
import os
import distutils.command.build
from pathlib import Path

from setuptools import find_namespace_packages, setup # type: ignore

THIS_DIR = Path(__file__).resolve().parent
REPO_DIR = THIS_DIR.parent
VERSION_INFO_FILE = REPO_DIR / "version_info.json"


with open(
os.path.join(
REPO_DIR,
"README.md",
),
"rt",
) as f:
README = f.read()


def load_version_info():
with open(VERSION_INFO_FILE, "rt") as f:
return json.load(f)


version_info = load_version_info()
PACKAGE_VERSION = version_info["package-version"]

packages = find_namespace_packages(
include=[
"turbine_serving",
"turbine_serving.*",
],
)

print("Found packages:", packages)

# Lookup version pins from requirements files.
requirement_pins = {}


def load_requirement_pins(requirements_file: Path):
with open(requirements_file, "rt") as f:
lines = f.readlines()
pin_pairs = [line.strip().split("==") for line in lines if "==" in line]
requirement_pins.update(dict(pin_pairs))


load_requirement_pins(THIS_DIR / "requirements.txt")
load_requirement_pins(REPO_DIR / "core" / "iree-requirements.txt")
load_requirement_pins(REPO_DIR / "core" / "misc-requirements.txt")


def get_version_spec(dep: str):
if dep in requirement_pins:
return f">={requirement_pins[dep]}"
else:
return ""


# Override build command so that we can build into _python_build
# instead of the default "build". This avoids collisions with
# typical CMake incantations, which can produce all kinds of
# hilarity (like including the contents of the build/lib directory).
class BuildCommand(distutils.command.build.build):
def initialize_options(self):
distutils.command.build.build.initialize_options(self)
self.build_base = "_python_build"


setup(
name=f"turbine-serving",
version=f"{PACKAGE_VERSION}",
author="SHARK Authors",
author_email="[email protected]",
description="SHARK Turbine Machine Learning Deployment Tools",
long_description=README,
long_description_content_type="text/markdown",
url="https://github.com/nod-ai/SHARK-Turbine",
license="Apache-2.0",
classifiers=[
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
],
packages=packages,
package_data={"turbine_serving": ["py.typed"]},
install_requires=[
f"fastapi{get_version_spec('fastapi')}",
f"iree-compiler{get_version_spec('iree-compiler')}",
f"iree-runtime{get_version_spec('iree-runtime')}",
f"uvicorn{get_version_spec('uvicorn')}",
],
extras_require={
"testing": [
f"pytest{get_version_spec('pytest')}",
f"pytest-xdist{get_version_spec('pytest-xdist')}",
],
},
cmdclass={"build": BuildCommand},
)
63 changes: 63 additions & 0 deletions serving/tests/api_server_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os
import pytest
import requests
import subprocess
import sys
import time


class ServerRunner:
def __init__(self, args):
self.url = "http://localhost:8000"
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
self.process = subprocess.Popen(
[
sys.executable,
"-m",
"turbine_serving.llm.entrypoints.api_server",
]
+ args,
env=env,
stdout=sys.stdout,
stderr=sys.stderr,
)
self._wait_for_ready()

def _wait_for_ready(self):
start = time.time()
while True:
try:
if requests.get(f"{self.url}/health").status_code == 200:
return
except Exception as e:
if self.process.poll() is not None:
raise RuntimeError("API server processs terminated") from e
time.sleep(0.25)
if time.time() - start > 30:
raise RuntimeError("Timeout waiting for server start") from e

def __del__(self):
try:
process = self.process
except AttributeError:
pass
else:
process.terminate()
process.wait()


@pytest.fixture(scope="session")
def server():
runner = ServerRunner([])
yield runner


def test_basic(server: ServerRunner):
...
Empty file.
Empty file.
50 changes: 50 additions & 0 deletions serving/turbine_serving/llm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2024 Advanced Micro Devices, Inc
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

from typing import Sequence

import argparse

from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response
import sys
import uvicorn

app = FastAPI()


@app.get("/health")
async def health() -> Response:
return Response(status_code=200)


def main(clargs: Sequence[str]):
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default=None)
parser.add_argument("--port", type=int, default=8000)
parser.add_argument(
"--root-path",
type=str,
default=None,
help="Root path to use for installing behind path based proxy.",
)
parser.add_argument(
"--timeout-keep-alive", type=int, default=5, help="Keep alive timeout"
)
args = parser.parse_args(clargs)

app.root_path = args.root_path
uvicorn.run(
app,
host=args.host,
port=args.port,
log_level="debug",
timeout_keep_alive=args.timeout_keep_alive,
)


if __name__ == "__main__":
main(sys.argv[1:])
1 change: 1 addition & 0 deletions serving/turbine_serving/py.typed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Marker file for PEP 561 inline type checking.

0 comments on commit e7f0f94

Please sign in to comment.