Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version bump to 0.24.0 #3527

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

"""The Composer Version."""

__version__ = '0.24.0.dev0'
__version__ = '0.24.0'
2 changes: 0 additions & 2 deletions composer/callbacks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from composer.callbacks.activation_monitor import ActivationMonitor
from composer.callbacks.checkpoint_saver import CheckpointSaver
from composer.callbacks.early_stopper import EarlyStopper
from composer.callbacks.eval_output_logging_callback import EvalOutputLogging
from composer.callbacks.export_for_inference import ExportForInferenceCallback
from composer.callbacks.free_outputs import FreeOutputs
from composer.callbacks.generate import Generate
Expand All @@ -36,7 +35,6 @@
'CheckpointSaver',
'MLPerfCallback',
'EarlyStopper',
'EvalOutputLogging',
'ExportForInferenceCallback',
'ThresholdStopper',
'ImageVisualizer',
Expand Down
129 changes: 0 additions & 129 deletions composer/callbacks/eval_output_logging_callback.py

This file was deleted.

10 changes: 6 additions & 4 deletions composer/checkpoint/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,12 @@ def torch_set_model_state_dict(
textwrap.dedent(
'PyTorch DTensor broke backwards compatibility in older checkpoints '
'with ShardedTensor, which is now deprecated. To load old checkpoints, '
'either downgrade to PyTorch <2.3.0 or explicitly pass process groups '
'in the Trainer constructor via '
"`parallelism_config = {'fsdp': {'process_group': 'mod1'}}`. We can "
'provide assistance at https://github.com/mosaicml/composer/issues.',
'either downgrade to PyTorch <2.3.0 or define process groups in the '
'Trainer constructor via the `data_parallel_shard_degree` or '
'`data_parallel_replicate_degree` arguments to `parallelism_config` via'
'the "fsdp" key, as '
'`parallelism_config = {"fsdp": {"data_parallel_shard_degree": 8}}`, '
'for example. We can provide assistance at https://github.com/mosaicml/composer/issues.',
),
) from e
else:
Expand Down
28 changes: 7 additions & 21 deletions composer/core/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
ParallelismConfig,
ParallelismType,
TPConfig,
VersionedDeprecationWarning,
batch_get,
batch_set,
dist,
Expand Down Expand Up @@ -617,7 +616,7 @@ def _validate_parallelism_configs(self):
# Load monolith rank0 only
if self.load_monolith_rank0_only:
if self.tp_config is not None:
raise ValueError('load_fsdp_monolith_rank0_only is not compatible with tensor parallelism (TP).')
raise ValueError('load_monolith_rank0_only is not compatible with tensor parallelism (TP).')
assert self.fsdp_config is not None
error_message = ''
if self.fsdp_config.sync_module_states == False:
Expand Down Expand Up @@ -900,21 +899,6 @@ def fsdp_state_dict_type(self):
def fsdp_sharded_state_dict_enabled(self):
return self.fsdp_config is not None and self.fsdp_enabled and self.fsdp_state_dict_type == 'sharded'

@property
def fsdp_device_mesh(self):
warnings.warn(VersionedDeprecationWarning('fsdp_device_mesh is deprecated. Use device_mesh instead.', '0.24'))
return self.device_mesh

@property
def load_fsdp_monolith_rank0_only(self):
warnings.warn(
VersionedDeprecationWarning(
'load_fsdp_monolith_rank0_only is deprecated. Use load_monolith_rank0_only instead.',
'0.24',
),
)
return self.load_monolith_rank0_only

@property
def load_monolith_rank0_only(self):
return (
Expand Down Expand Up @@ -1352,10 +1336,12 @@ def load_model_state(
textwrap.dedent(
'PyTorch DTensor broke backwards compatibility in older checkpoints '
'with ShardedTensor, which is now deprecated. To load old checkpoints, '
'either downgrade to PyTorch <2.3.0 or explicitly pass process groups '
'in the Trainer constructor via '
"`parallelism_config = {'fsdp': {'process_group': 'mod1'}}`. We can "
'provide assistance at https://github.com/mosaicml/composer/issues.',
'either downgrade to PyTorch <2.3.0 or define process groups in the '
'Trainer constructor via the `data_parallel_shard_degree` or '
'`data_parallel_replicate_degree` arguments to `parallelism_config` via'
'the "fsdp" key, as '
'`parallelism_config = {"fsdp": {"data_parallel_shard_degree": 8}}`, '
'for example. We can provide assistance at https://github.com/mosaicml/composer/issues.',
),
) from e
else:
Expand Down
3 changes: 1 addition & 2 deletions composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@
Transform,
VersionedDeprecationWarning,
checkpoint,
create_fsdp_config,
dist,
ensure_tuple,
export_with_logger,
Expand Down Expand Up @@ -1323,7 +1322,7 @@ def __init__(
if isinstance(parallelism_config['fsdp'], FSDPConfig):
parallelism_config_args['fsdp'] = parallelism_config['fsdp']
else:
parallelism_config_args['fsdp'] = create_fsdp_config(parallelism_config['fsdp'])
parallelism_config_args['fsdp'] = FSDPConfig(**parallelism_config['fsdp'])
if 'tp' in parallelism_config and parallelism_config['tp'] is not None:
if isinstance(parallelism_config['tp'], TPConfig):
parallelism_config_args['tp'] = parallelism_config['tp']
Expand Down
3 changes: 1 addition & 2 deletions composer/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
UCObjectStore,
build_remote_backend,
)
from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig, create_fsdp_config
from composer.utils.parallelism import FSDPConfig, ParallelismConfig, TPConfig
from composer.utils.remote_uploader import RemoteFilesExistingCheckStatus, RemoteUploader
from composer.utils.retrying import retry
from composer.utils.string_enum import StringEnum
Expand Down Expand Up @@ -153,7 +153,6 @@
'KNOWN_COMPRESSORS',
'STR_TO_DTYPE',
'ParallelismType',
'create_fsdp_config',
'FSDPConfig',
'TPConfig',
'ParallelismConfig',
Expand Down
39 changes: 0 additions & 39 deletions composer/utils/parallelism.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@

"""Parallelism configs."""

import warnings
from dataclasses import dataclass
from typing import Any, Optional

from torch.distributed._tensor.device_mesh import DeviceMesh

from composer.utils.warnings import VersionedDeprecationWarning


@dataclass
class FSDPConfig:
Expand Down Expand Up @@ -45,42 +42,6 @@ class FSDPConfig:
verbose: bool = False


def create_fsdp_config(fsdp_config: dict[str, Any]):
"""Modify fsdp_config to set default values for missing keys."""
fsdp_config = {**fsdp_config} # Shallow copy to avoid modifying input
if 'process_group' in fsdp_config:
warnings.warn(
VersionedDeprecationWarning(
'process_group is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
remove_version='0.24',
),
)

if 'device_mesh' in fsdp_config:
warnings.warn(
VersionedDeprecationWarning(
'device_mesh is deprecated. Please specify `data_parallel_shard_degree` and `data_parallel_replicate_degree` instead.',
remove_version='0.24',
),
)
if 'data_parallel_shard_degree' in fsdp_config or 'data_parallel_replicate_degree' in fsdp_config:
raise ValueError(
'Cannot specify both `device_mesh` and `data_parallel_shard_degree` or `data_parallel_replicate_degree`. Please remove `device_mesh`.',
)
device_mesh = fsdp_config.pop('device_mesh')
if len(device_mesh) == 1:
fsdp_config['data_parallel_shard_degree'] = device_mesh[0]
elif len(device_mesh) == 2:
fsdp_config['data_parallel_replicate_degree'] = device_mesh[0]
fsdp_config['data_parallel_shard_degree'] = device_mesh[1]
else:
raise ValueError(
f'device_mesh must be of length 1 or 2 but received length {len(device_mesh)} with device mesh {device_mesh}.',
)

return FSDPConfig(**fsdp_config)


@dataclass
class TPConfig:
"""Configuration for tensor parallelism (TP)."""
Expand Down
4 changes: 2 additions & 2 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
<!-- BEGIN_COMPOSER_BUILD_MATRIX -->
| Composer Version | CUDA Support | Docker Tag |
|--------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0.23.5 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.23.5` |
| 0.23.5 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.23.5_cpu` |
| 0.24.0 | Yes | `mosaicml/composer:latest`, `mosaicml/composer:0.24.0` |
| 0.24.0 | No | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.24.0_cpu` |
<!-- END_COMPOSER_BUILD_MATRIX -->

**Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually
Expand Down
16 changes: 8 additions & 8 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -194,36 +194,36 @@
TORCHVISION_VERSION: 0.17.2
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:12.4.1-cudnn-devel-ubuntu20.04
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
CUDA_VERSION: 12.4.1
IMAGE_NAME: composer-0-23-5
IMAGE_NAME: composer-0-24-0
MOFED_VERSION: latest-23.10
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/composer:0.23.5
- ghcr.io/databricks-mosaic/composer:0.23.5
- mosaicml/composer:0.24.0
- ghcr.io/databricks-mosaic/composer:0.24.0
- mosaicml/composer:latest
- ghcr.io/databricks-mosaic/composer:latest
TARGET: composer_stage
TORCHVISION_VERSION: 0.19.0
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: ubuntu:20.04
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.23.5
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.24.0
CUDA_VERSION: ''
IMAGE_NAME: composer-0-23-5-cpu
IMAGE_NAME: composer-0-24-0-cpu
MOFED_VERSION: latest-23.10
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.11'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.4.0
TAGS:
- mosaicml/composer:0.23.5_cpu
- ghcr.io/databricks-mosaic/composer:0.23.5_cpu
- mosaicml/composer:0.24.0_cpu
- ghcr.io/databricks-mosaic/composer:0.24.0_cpu
- mosaicml/composer:latest_cpu
- ghcr.io/databricks-mosaic/composer:latest_cpu
TARGET: composer_stage
Expand Down
2 changes: 1 addition & 1 deletion docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _main():
composer_entries = []

# The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
composer_versions = ['0.23.5'] # Only build images for the latest composer version
composer_versions = ['0.24.0'] # Only build images for the latest composer version
composer_python_versions = [PRODUCTION_PYTHON_VERSION] # just build composer against the latest

for product in itertools.product(composer_python_versions, composer_versions, cuda_options):
Expand Down
Loading
Loading