From bc7ec75dc5d03b90669219e9de3354dd8e6cc86e Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Mon, 16 Dec 2024 17:22:38 -0800
Subject: [PATCH] fix gh200 tests on main (#11246)

Signed-off-by: youkaichao <youkaichao@gmail.com>
---
 .buildkite/run-gh200-test.sh                  | 4 ++--
 docs/source/serving/deploying_with_docker.rst | 5 +----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh
index d25510c47fe6b..d06604f96f2b8 100644
--- a/.buildkite/run-gh200-test.sh
+++ b/.buildkite/run-gh200-test.sh
@@ -6,8 +6,8 @@ set -ex
 
 # Try building the docker image
 DOCKER_BUILDKIT=1 docker build . \
-  --target test \
-  -platform "linux/arm64" \
+  --target vllm-openai \
+  --platform "linux/arm64" \
   -t gh200-test \
   --build-arg max_jobs=66 \
   --build-arg nvcc_threads=2 \
diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst
index 11a9f12fd17cd..56f0020a1011a 100644
--- a/docs/source/serving/deploying_with_docker.rst
+++ b/docs/source/serving/deploying_with_docker.rst
@@ -54,16 +54,13 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `--
     # Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB)
     $ DOCKER_BUILDKIT=1 sudo docker build . \
       --target vllm-openai \
-      -platform "linux/arm64" \
+      --platform "linux/arm64" \
       -t vllm/vllm-gh200-openai:latest \
       --build-arg max_jobs=66 \
       --build-arg nvcc_threads=2 \
       --build-arg torch_cuda_arch_list="9.0+PTX" \
       --build-arg vllm_fa_cmake_gpu_arches="90-real"
 
-
-
-
 To run vLLM:
 
 .. code-block:: console