diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bcefe1d2f..b718d0832 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -9,11 +9,8 @@ on: jobs: black: - strategy: - matrix: - version: [3.11] - os: [ubuntu-latest] - runs-on: ${{matrix.os}} + name: Python Formatting With Black + runs-on: ubuntu-latest steps: - name: Checking out repository uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 290fccb19..bbb6379ae 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Turbine Core Test +name: Turbine Unit Tests on: workflow_dispatch: @@ -17,6 +17,7 @@ concurrency: jobs: test: + name: "Test" strategy: matrix: version: [3.11] @@ -40,10 +41,23 @@ jobs: # wheels saves multiple minutes and a lot of bandwidth on runner setup. pip install --index-url https://download.pytorch.org/whl/cpu \ -r core/pytorch-cpu-requirements.txt \ - -r core/torchvision-requirements.txt - pip install --upgrade -r core/requirements.txt - pip install -e core[testing] + -r core/torchvision-requirements.txt + pip install --upgrade \ + -r core/requirements.txt \ + -r mypy-requirements.txt + pip install -e core[testing] -e serving[testing] - - name: Run tests + - name: Run core tests + if: ${{ !cancelled() }} run: | - pytest -n 4 core/tests/ + pytest -n 4 core/ + + - name: Run serving tests + if: ${{ !cancelled() }} + run: | + pytest -n 4 serving/ + + - name: MyPy Type Checking + if: ${{ !cancelled() }} + run: | + mypy serving/ diff --git a/README.md b/README.md index 0a88880a8..655450db3 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ can specify pytorch-cpu and install via: ``` pip install --index-url https://download.pytorch.org/whl/cpu \ -r core/pytorch-cpu-requirements.txt \ - -r core torchvision-requirements.txt + -r core/torchvision-requirements.txt pip install shark-turbine ``` diff --git a/core/iree-requirements.txt b/core/iree-requirements.txt new file mode 100644 index 000000000..8e02bb4a5 --- /dev/null +++ b/core/iree-requirements.txt @@ -0,0 +1,2 @@ +iree-compiler==20240129.785 +iree-runtime==20240129.785 diff --git a/core/misc-requirements.txt b/core/misc-requirements.txt new file mode 100644 index 000000000..becb775f9 --- /dev/null +++ b/core/misc-requirements.txt @@ -0,0 +1,4 @@ +numpy>=1.26.3 +onnx>=1.15.0 +pytest>=8.0.0 +pytest-xdist>=3.5.0 diff --git a/core/requirements.txt b/core/requirements.txt index 58053f50d..128012cb7 100644 --- a/core/requirements.txt +++ b/core/requirements.txt @@ -6,6 +6,4 @@ -r pytorch-cpu-requirements.txt -r torchvision-requirements.txt - -iree-compiler==20240129.785 -iree-runtime==20240129.785 +-r iree-requirements.txt diff --git a/core/setup.py b/core/setup.py index f1aeda8c4..555519462 100644 --- a/core/setup.py +++ b/core/setup.py @@ -54,7 +54,8 @@ def load_requirement_pins(requirements_file: str): requirement_pins.update(dict(pin_pairs)) -load_requirement_pins("requirements.txt") +load_requirement_pins("iree-requirements.txt") +load_requirement_pins("misc-requirements.txt") load_requirement_pins("pytorch-cpu-requirements.txt") @@ -97,7 +98,7 @@ def initialize_options(self): ], }, install_requires=[ - "numpy", + f"numpy{get_version_spec('numpy')}", f"iree-compiler{get_version_spec('iree-compiler')}", f"iree-runtime{get_version_spec('iree-runtime')}", # Use the [torch-cpu-nightly] spec to get a more recent/specific version. @@ -106,12 +107,12 @@ def initialize_options(self): extras_require={ "torch-cpu-nightly": [f"torch{get_version_spec('torch')}"], "onnx": [ - "onnx>=1.15.0", + f"onnx{get_version_spec('onnx')}", ], "testing": [ - "onnx==1.15.0", - "pytest", - "pytest-xdist", + f"onnx{get_version_spec('onnx')}", + f"pytest{get_version_spec('pytest')}", + f"pytest-xdist{get_version_spec('pytest-xdist')}", ], }, cmdclass={"build": BuildCommand}, diff --git a/mypy-requirements.txt b/mypy-requirements.txt new file mode 100644 index 000000000..f2484e486 --- /dev/null +++ b/mypy-requirements.txt @@ -0,0 +1,3 @@ +# Typing packages needed for full mypy execution at the project level. +mypy==1.8.0 +types-requests diff --git a/serving/README.md b/serving/README.md new file mode 100644 index 000000000..e1ed3f8a9 --- /dev/null +++ b/serving/README.md @@ -0,0 +1,12 @@ +# Turbine Serving Infrastructure + +This sub-project contains components and infrastructure for serving various +forms of Turbine compiled models. Instead of coming with models, it defines +ABIs that compiled models should adhere to in order to be served. It then +allows them to be delivered as web endpoints via popular APIs. + +As emulation can be the sincerest form of flattery, this project derives +substantial inspiration from vllm and the OpenAI APIs, emulating and +interopping with them where possible. It is intended to be the lightest +weight possible reference implementation for serving models with an +opinionated compiled form, built elsewhere in the project. diff --git a/serving/mypy.ini b/serving/mypy.ini new file mode 100644 index 000000000..fdba402eb --- /dev/null +++ b/serving/mypy.ini @@ -0,0 +1,5 @@ +[mypy] + +explicit_package_bases = True +mypy_path = $MYPY_CONFIG_FILE_DIR +packages = turbine_serving.llm diff --git a/serving/pyproject.toml b/serving/pyproject.toml new file mode 100644 index 000000000..9787c3bdf --- /dev/null +++ b/serving/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/serving/requirements.txt b/serving/requirements.txt new file mode 100644 index 000000000..3c9503df4 --- /dev/null +++ b/serving/requirements.txt @@ -0,0 +1,2 @@ +fastapi>=0.109.2 +uvicorn>=0.27.0 diff --git a/serving/setup.cfg b/serving/setup.cfg new file mode 100644 index 000000000..358360671 --- /dev/null +++ b/serving/setup.cfg @@ -0,0 +1,6 @@ +[tool:pytest] +testpaths = + ./tests +filterwarnings = + # TODO: Remove once flatbuffer 'imp' usage resolved. + ignore::DeprecationWarning diff --git a/serving/setup.py b/serving/setup.py new file mode 100644 index 000000000..37ad48703 --- /dev/null +++ b/serving/setup.py @@ -0,0 +1,109 @@ +# Copyright 2024 Advanced Micro Devices, Inc +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import json +import os +import distutils.command.build +from pathlib import Path + +from setuptools import find_namespace_packages, setup # type: ignore + +THIS_DIR = Path(__file__).resolve().parent +REPO_DIR = THIS_DIR.parent +VERSION_INFO_FILE = REPO_DIR / "version_info.json" + + +with open( + os.path.join( + REPO_DIR, + "README.md", + ), + "rt", +) as f: + README = f.read() + + +def load_version_info(): + with open(VERSION_INFO_FILE, "rt") as f: + return json.load(f) + + +version_info = load_version_info() +PACKAGE_VERSION = version_info["package-version"] + +packages = find_namespace_packages( + include=[ + "turbine_serving", + "turbine_serving.*", + ], +) + +print("Found packages:", packages) + +# Lookup version pins from requirements files. +requirement_pins = {} + + +def load_requirement_pins(requirements_file: Path): + with open(requirements_file, "rt") as f: + lines = f.readlines() + pin_pairs = [line.strip().split("==") for line in lines if "==" in line] + requirement_pins.update(dict(pin_pairs)) + + +load_requirement_pins(THIS_DIR / "requirements.txt") +load_requirement_pins(REPO_DIR / "core" / "iree-requirements.txt") +load_requirement_pins(REPO_DIR / "core" / "misc-requirements.txt") + + +def get_version_spec(dep: str): + if dep in requirement_pins: + return f">={requirement_pins[dep]}" + else: + return "" + + +# Override build command so that we can build into _python_build +# instead of the default "build". This avoids collisions with +# typical CMake incantations, which can produce all kinds of +# hilarity (like including the contents of the build/lib directory). +class BuildCommand(distutils.command.build.build): + def initialize_options(self): + distutils.command.build.build.initialize_options(self) + self.build_base = "_python_build" + + +setup( + name=f"turbine-serving", + version=f"{PACKAGE_VERSION}", + author="SHARK Authors", + author_email="stella@nod.ai", + description="SHARK Turbine Machine Learning Deployment Tools", + long_description=README, + long_description_content_type="text/markdown", + url="https://github.com/nod-ai/SHARK-Turbine", + license="Apache-2.0", + classifiers=[ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + ], + packages=packages, + package_data={"turbine_serving": ["py.typed"]}, + install_requires=[ + f"fastapi{get_version_spec('fastapi')}", + f"iree-compiler{get_version_spec('iree-compiler')}", + f"iree-runtime{get_version_spec('iree-runtime')}", + f"uvicorn{get_version_spec('uvicorn')}", + ], + extras_require={ + "testing": [ + f"pytest{get_version_spec('pytest')}", + f"pytest-xdist{get_version_spec('pytest-xdist')}", + ], + }, + cmdclass={"build": BuildCommand}, +) diff --git a/serving/tests/api_server_test.py b/serving/tests/api_server_test.py new file mode 100644 index 000000000..602dfb830 --- /dev/null +++ b/serving/tests/api_server_test.py @@ -0,0 +1,63 @@ +# Copyright 2024 Advanced Micro Devices, Inc +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import pytest +import requests +import subprocess +import sys +import time + + +class ServerRunner: + def __init__(self, args): + self.url = "http://localhost:8000" + env = os.environ.copy() + env["PYTHONUNBUFFERED"] = "1" + self.process = subprocess.Popen( + [ + sys.executable, + "-m", + "turbine_serving.llm.entrypoints.api_server", + ] + + args, + env=env, + stdout=sys.stdout, + stderr=sys.stderr, + ) + self._wait_for_ready() + + def _wait_for_ready(self): + start = time.time() + while True: + try: + if requests.get(f"{self.url}/health").status_code == 200: + return + except Exception as e: + if self.process.poll() is not None: + raise RuntimeError("API server processs terminated") from e + time.sleep(0.25) + if time.time() - start > 30: + raise RuntimeError("Timeout waiting for server start") from e + + def __del__(self): + try: + process = self.process + except AttributeError: + pass + else: + process.terminate() + process.wait() + + +@pytest.fixture(scope="session") +def server(): + runner = ServerRunner([]) + yield runner + + +def test_basic(server: ServerRunner): + ... diff --git a/serving/turbine_serving/__init__.py b/serving/turbine_serving/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/serving/turbine_serving/llm/__init__.py b/serving/turbine_serving/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/serving/turbine_serving/llm/entrypoints/api_server.py b/serving/turbine_serving/llm/entrypoints/api_server.py new file mode 100644 index 000000000..f23b4abe9 --- /dev/null +++ b/serving/turbine_serving/llm/entrypoints/api_server.py @@ -0,0 +1,50 @@ +# Copyright 2024 Advanced Micro Devices, Inc +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from typing import Sequence + +import argparse + +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response +import sys +import uvicorn + +app = FastAPI() + + +@app.get("/health") +async def health() -> Response: + return Response(status_code=200) + + +def main(clargs: Sequence[str]): + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default=None) + parser.add_argument("--port", type=int, default=8000) + parser.add_argument( + "--root-path", + type=str, + default=None, + help="Root path to use for installing behind path based proxy.", + ) + parser.add_argument( + "--timeout-keep-alive", type=int, default=5, help="Keep alive timeout" + ) + args = parser.parse_args(clargs) + + app.root_path = args.root_path + uvicorn.run( + app, + host=args.host, + port=args.port, + log_level="debug", + timeout_keep_alive=args.timeout_keep_alive, + ) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/serving/turbine_serving/py.typed b/serving/turbine_serving/py.typed new file mode 100644 index 000000000..5e43cc13b --- /dev/null +++ b/serving/turbine_serving/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561 inline type checking.