kaito-project · Fei-Guo · Oct 24, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024
@@ -48,7 +48,7 @@ jobs:
 
       - name: Run inference api unit tests
         run: |
-          make inference-api-e2e
+          DEVICE=cpu make inference-api-e2e
 
       - name: Upload Codecov report
         uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673 # v4.5.0

@@ -7,6 +7,7 @@
 *.dylib
 bin/*
 Dockerfile.cross
+__pycache__/
 
 # Test binary, build with `go test -c`
 *.test

@@ -101,9 +101,10 @@ unit-test: ## Run unit tests.
 ## E2E tests
 ## --------------------------------------
 
-inference-api-e2e: 
-	pip install -r presets/inference/text-generation/requirements.txt
-	pytest -o log_cli=true -o log_cli_level=INFO presets/inference/text-generation/tests
+inference-api-e2e:
+	pip install virtualenv
+	./hack/run-pytest-in-venv.sh presets/inference/vllm presets/inference/vllm/requirements.txt
+	./hack/run-pytest-in-venv.sh presets/inference/text-generation presets/inference/text-generation/requirements.txt
 
 # Ginkgo configurations
 GINKGO_FOCUS ?=

@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <test_dir> <requirements.txt>"
+    exit 1
+fi
+
+PROJECT_DIR=$(dirname "$(dirname "$(realpath "$0")")")
+
+TEST_DIR="$PROJECT_DIR/$1"
+REQUIREMENTS="$PROJECT_DIR/$2"
+VENV_DIR=$(mktemp -d)
+
+cleanup() {
+    rm -rf "$VENV_DIR"
+}
+trap cleanup EXIT
+
+cd $VENV_DIR
+printf "Creating virtual environment in %s\n" "$VENV_DIR"
+python3 -m virtualenv venv
+source "$VENV_DIR/venv/bin/activate"
+if [ "$?" -ne 0 ]; then
+    printf "Failed to activate virtual environment\n"
+    exit 1
+fi
+
+printf "Installing requirements from %s\n" "$REQUIREMENTS"
+pip install -r "$REQUIREMENTS" > "$VENV_DIR/pip.log"
+if [ "$?" -ne 0 ]; then
+    cat "$VENV_DIR/pip.log"
+    exit 1
+fi
+
+printf "Running tests in %s\n" "$TEST_DIR"
+pytest -o log_cli=true -o log_cli_level=INFO "$TEST_DIR"
@@ -192,7 +192,7 @@ def get_metrics():
             return {"error": str(e)}
 
 def setup_worker_routes():
-    @app_worker.get("/healthz")
+    @app_worker.get("/health")
     def health_check():
         if not torch.cuda.is_available():
             raise HTTPException(status_code=500, detail="No GPU available")

@@ -24,7 +24,7 @@
                 }
             }
         },
-        "/healthz": {
+        "/health": {
             "get": {
                 "summary": "Health Check Endpoint",
                 "operationId": "health_check_healthz_get",

@@ -181,7 +181,7 @@ def home():
 class HealthStatus(BaseModel):
     status: str = Field(..., example="Healthy")
 @app.get(
-    "/healthz",
+    "/health",
     response_model=HealthStatus,
     summary="Health Check Endpoint",
     responses={
@@ -461,7 +461,7 @@ def get_metrics():
         if torch.cuda.is_available():
             gpus = GPUtil.getGPUs()
             gpu_info = [GPUInfo(
-                id=gpu.id,
+                id=str(gpu.id),
                 name=gpu.name,
                 load=f"{gpu.load * 100:.2f}%",
                 temperature=f"{gpu.temperature} C",

@@ -108,7 +108,7 @@ def test_read_main(configured_app):
 
 def test_health_check(configured_app):
     client = TestClient(configured_app)
-    response = client.get("/healthz")
+    response = client.get("/health")
     assert response.status_code == 200
     assert response.json() == {"status": "Healthy"}