diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index c3fc9168ee..b548f34234 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -23,6 +23,12 @@ jobs: - name: "2.4.0_cu124_aws" base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws dep_groups: "[all]" + - name: "2.5.1_cu124" + base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04 + dep_groups: "[all]" + - name: "2.5.1_cu124_aws" + base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04-aws + dep_groups: "[all]" steps: - name: Checkout diff --git a/scripts/inference/convert_composer_mpt_to_ft.py b/scripts/inference/convert_composer_mpt_to_ft.py index 16cfabf125..cd1815e20f 100644 --- a/scripts/inference/convert_composer_mpt_to_ft.py +++ b/scripts/inference/convert_composer_mpt_to_ft.py @@ -132,7 +132,7 @@ def write_ft_checkpoint_from_composer_checkpoint( # Extract the HF tokenizer print('#' * 30) print('Extracting HF Tokenizer...') - hf_tokenizer = get_hf_tokenizer_from_composer_state_dict( + hf_tokenizer = get_hf_tokenizer_from_composer_state_dict( # pyright: ignore composer_state_dict, trust_remote_code, ) @@ -141,7 +141,7 @@ def write_ft_checkpoint_from_composer_checkpoint( # Extract the model weights weights_state_dict = composer_state_dict['state']['model'] - torch.nn.modules.utils.consume_prefix_in_state_dict_if_present( + torch.nn.modules.utils.consume_prefix_in_state_dict_if_present( # pyright: ignore weights_state_dict, prefix='model.', ) diff --git a/scripts/inference/convert_composer_to_hf.py b/scripts/inference/convert_composer_to_hf.py index dc7314f3e9..3afd09cb46 100644 --- a/scripts/inference/convert_composer_to_hf.py +++ b/scripts/inference/convert_composer_to_hf.py @@ -133,7 +133,7 @@ def write_huggingface_pretrained_from_composer_checkpoint( weights_state_dict = composer_state_dict if 'state' in weights_state_dict: weights_state_dict = weights_state_dict['state']['model'] - torch.nn.modules.utils.consume_prefix_in_state_dict_if_present( + torch.nn.modules.utils.consume_prefix_in_state_dict_if_present( # pyright: ignore weights_state_dict, prefix='model.', ) diff --git a/setup.py b/setup.py index 566e6aae9c..9c54fdc232 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ 'accelerate>=0.25,<1.2', # for HF inference `device_map` 'transformers>=4.43.2,<4.47', 'mosaicml-streaming>=0.9.0,<0.10', - 'torch>=2.4.0,<2.4.1', + 'torch>=2.4.0,<2.5.2', 'datasets>=2.20.0,<2.21', 'fsspec==2023.6.0', # newer version results in a bug in datasets that duplicates data 'sentencepiece==0.2.0',