diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst index 3118e19daf118..11a9f12fd17cd 100644 --- a/docs/source/serving/deploying_with_docker.rst +++ b/docs/source/serving/deploying_with_docker.rst @@ -51,11 +51,11 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `-- .. code-block:: console - # Example of building on Nvidia GH200 server. (Memory usage: ~180GB, Build time: ~2387s / ~40 min) + # Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB) $ DOCKER_BUILDKIT=1 sudo docker build . \ --target vllm-openai \ -platform "linux/arm64" \ - -t drikster80/vllm-gh200-openai:v0.6.4.post1 \ + -t vllm/vllm-gh200-openai:latest \ --build-arg max_jobs=66 \ --build-arg nvcc_threads=2 \ --build-arg torch_cuda_arch_list="9.0+PTX" \ diff --git a/requirements-cuda-arm64.txt b/requirements-cuda-arm64.txt index a8baf1dedb5a8..bbcb5cb7012ce 100644 --- a/requirements-cuda-arm64.txt +++ b/requirements-cuda-arm64.txt @@ -1,3 +1,3 @@ --index-url https://download.pytorch.org/whl/nightly/cu124 -torchvision; platform_machine == 'aarch64' -torch; platform_machine == 'aarch64' +torchvision==0.22.0.dev20241215; platform_machine == 'aarch64' +torch==2.6.0.dev20241210+cu124; platform_machine == 'aarch64'