diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst
index 3118e19daf118..11a9f12fd17cd 100644
--- a/docs/source/serving/deploying_with_docker.rst
+++ b/docs/source/serving/deploying_with_docker.rst
@@ -51,11 +51,11 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `--
 
 .. code-block:: console
 
-    # Example of building on Nvidia GH200 server. (Memory usage: ~180GB, Build time: ~2387s / ~40 min)
+    # Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB)
     $ DOCKER_BUILDKIT=1 sudo docker build . \
       --target vllm-openai \
       -platform "linux/arm64" \
-      -t drikster80/vllm-gh200-openai:v0.6.4.post1 \
+      -t vllm/vllm-gh200-openai:latest \
       --build-arg max_jobs=66 \
       --build-arg nvcc_threads=2 \
       --build-arg torch_cuda_arch_list="9.0+PTX" \
diff --git a/requirements-cuda-arm64.txt b/requirements-cuda-arm64.txt
index a8baf1dedb5a8..bbcb5cb7012ce 100644
--- a/requirements-cuda-arm64.txt
+++ b/requirements-cuda-arm64.txt
@@ -1,3 +1,3 @@
 --index-url https://download.pytorch.org/whl/nightly/cu124
-torchvision; platform_machine == 'aarch64'
-torch; platform_machine == 'aarch64'
+torchvision==0.22.0.dev20241215; platform_machine == 'aarch64'
+torch==2.6.0.dev20241210+cu124; platform_machine == 'aarch64'