From 1053702eec90bb2e5c449dc8393a9536486604b0 Mon Sep 17 00:00:00 2001 From: Mihir Patel Date: Tue, 5 Nov 2024 12:14:12 -0500 Subject: [PATCH] Torch bump to 2.5.1 (#3701) --- composer/trainer/_patch_pytorch.py | 2 +- docker/README.md | 6 ++--- docker/build_matrix.yaml | 38 +++++++++++++++--------------- docker/generate_build_matrix.py | 10 ++++---- setup.py | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/composer/trainer/_patch_pytorch.py b/composer/trainer/_patch_pytorch.py index fd7a6c9df8..f8350ef021 100644 --- a/composer/trainer/_patch_pytorch.py +++ b/composer/trainer/_patch_pytorch.py @@ -1055,7 +1055,7 @@ def unshard_with_sync(self): if version.parse(torch.__version__) >= version.parse('2.5.0') and version.parse( torch.__version__, -) < version.parse('2.5.1'): +) < version.parse('2.5.2'): # Save original FlatParamHandle.unshard to revert back to when dropping automicrobatching hooks from torch.distributed.fsdp._flat_param import FlatParamHandle diff --git a/docker/README.md b/docker/README.md index c74a451e87..914e975dcc 100644 --- a/docker/README.md +++ b/docker/README.md @@ -30,9 +30,9 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Ubuntu 20.04 | Base | 2.5.0 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04` | -| Ubuntu 20.04 | Base | 2.5.0 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04-aws` | -| Ubuntu 20.04 | Base | 2.5.0 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.5.1 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04` | +| Ubuntu 20.04 | Base | 2.5.1 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04-aws` | +| Ubuntu 20.04 | Base | 2.5.1 | cpu | 3.11 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.5.1_cpu-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.4.1 | 12.4.1 (Infiniband) | 3.11 | `mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.4.1 | 12.4.1 (EFA) | 3.11 | `mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04-aws` | | Ubuntu 20.04 | Base | 2.4.1 | cpu | 3.11 | `mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index ca4d62fc02..c43d01030a 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -2,54 +2,54 @@ - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04 CUDA_VERSION: 12.4.1 - IMAGE_NAME: torch-2-5-0-cu124 + IMAGE_NAME: torch-2-5-1-cu124 MOFED_VERSION: latest-23.10 NVIDIA_REQUIRE_CUDA_OVERRIDE: '' PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.5.0 + PYTORCH_VERSION: 2.5.1 TAGS: - - mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04 - - ghcr.io/databricks-mosaic/pytorch:2.5.0_cu124-python3.11-ubuntu20.04 + - mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04 + - ghcr.io/databricks-mosaic/pytorch:2.5.1_cu124-python3.11-ubuntu20.04 - mosaicml/pytorch:latest - ghcr.io/databricks-mosaic/pytorch:latest TARGET: pytorch_stage - TORCHVISION_VERSION: 0.20.0 + TORCHVISION_VERSION: 0.20.1 - AWS_OFI_NCCL_VERSION: v1.11.0-aws BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04 CUDA_VERSION: 12.4.1 - IMAGE_NAME: torch-2-5-0-cu124-aws + IMAGE_NAME: torch-2-5-1-cu124-aws MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: '' PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.5.0 + PYTORCH_VERSION: 2.5.1 TAGS: - - mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04-aws - - ghcr.io/databricks-mosaic/pytorch:2.5.0_cu124-python3.11-ubuntu20.04-aws + - mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu20.04-aws + - ghcr.io/databricks-mosaic/pytorch:2.5.1_cu124-python3.11-ubuntu20.04-aws - mosaicml/pytorch:latest-aws - ghcr.io/databricks-mosaic/pytorch:latest-aws TARGET: pytorch_stage - TORCHVISION_VERSION: 0.20.0 + TORCHVISION_VERSION: 0.20.1 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: ubuntu:20.04 CUDA_VERSION: '' - IMAGE_NAME: torch-2-5-0-cpu + IMAGE_NAME: torch-2-5-1-cpu MOFED_VERSION: '' NVIDIA_REQUIRE_CUDA_OVERRIDE: '' PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.5.0 + PYTORCH_VERSION: 2.5.1 TAGS: - - mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04 - - ghcr.io/databricks-mosaic/pytorch:2.5.0_cpu-python3.11-ubuntu20.04 + - mosaicml/pytorch:2.5.1_cpu-python3.11-ubuntu20.04 + - ghcr.io/databricks-mosaic/pytorch:2.5.1_cpu-python3.11-ubuntu20.04 - mosaicml/pytorch:latest_cpu - ghcr.io/databricks-mosaic/pytorch:latest_cpu TARGET: pytorch_stage - TORCHVISION_VERSION: 0.20.0 + TORCHVISION_VERSION: 0.20.1 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04 CUDA_VERSION: 12.4.1 @@ -176,14 +176,14 @@ PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.5.0 + PYTORCH_VERSION: 2.5.1 TAGS: - mosaicml/composer:0.26.0 - ghcr.io/databricks-mosaic/composer:0.26.0 - mosaicml/composer:latest - ghcr.io/databricks-mosaic/composer:latest TARGET: composer_stage - TORCHVISION_VERSION: 0.20.0 + TORCHVISION_VERSION: 0.20.1 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: ubuntu:20.04 COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.26.0 @@ -194,11 +194,11 @@ PYTHON_VERSION: '3.11' PYTORCH_NIGHTLY_URL: '' PYTORCH_NIGHTLY_VERSION: '' - PYTORCH_VERSION: 2.5.0 + PYTORCH_VERSION: 2.5.1 TAGS: - mosaicml/composer:0.26.0_cpu - ghcr.io/databricks-mosaic/composer:0.26.0_cpu - mosaicml/composer:latest_cpu - ghcr.io/databricks-mosaic/composer:latest_cpu TARGET: composer_stage - TORCHVISION_VERSION: 0.20.0 + TORCHVISION_VERSION: 0.20.1 diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index e68727ef20..36c6556f2a 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -20,12 +20,12 @@ import yaml PRODUCTION_PYTHON_VERSION = '3.11' -PRODUCTION_PYTORCH_VERSION = '2.5.0' +PRODUCTION_PYTORCH_VERSION = '2.5.1' def _get_torchvision_version(pytorch_version: str): - if pytorch_version == '2.5.0': - return '0.20.0' + if pytorch_version == '2.5.1': + return '0.20.1' if pytorch_version == '2.4.1': return '0.19.1' if pytorch_version == '2.3.1': @@ -45,7 +45,7 @@ def _get_cuda_version(pytorch_version: str, use_cuda: bool): # From https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/ if not use_cuda: return '' - if pytorch_version == '2.5.0': + if pytorch_version == '2.5.1': return '12.4.1' if pytorch_version == '2.4.1': return '12.4.1' @@ -180,7 +180,7 @@ def _write_table(table_tag: str, table_contents: str): def _main(): - python_pytorch_versions = [('3.11', '2.5.0'), ('3.11', '2.4.1'), ('3.11', '2.3.1')] + python_pytorch_versions = [('3.11', '2.5.1'), ('3.11', '2.4.1'), ('3.11', '2.3.1')] cuda_options = [True, False] stages = ['pytorch_stage'] interconnects = ['mellanox', 'EFA'] # mellanox is default, EFA needed for AWS diff --git a/setup.py b/setup.py index c80bc92273..243853959b 100644 --- a/setup.py +++ b/setup.py @@ -80,8 +80,8 @@ def package_files(prefix: str, directory: str, extension: str): 'tqdm>=4.62.3,<5', 'torchmetrics>=1.0,<1.4.1', 'torch_optimizer>=0.3.0,<0.4', - 'torchvision>=0.18.0,<0.20.1', - 'torch>=2.3.0,<2.5.1', + 'torchvision>=0.18.0,<0.20.2', + 'torch>=2.3.0,<2.5.2', 'requests>=2.26.0,<3', 'numpy>=1.21.5,<2.2.0', 'psutil>=5.8.0,<7',