diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index c3fc9168ee..b548f34234 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -23,6 +23,12 @@ jobs:
         - name: "2.4.0_cu124_aws"
           base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
           dep_groups: "[all]"
+        - name: "2.5.1_cu124"
+          base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04
+          dep_groups: "[all]"
+        - name: "2.5.1_cu124_aws"
+          base_image: mosaicml/pytorch:2.5.1_cu124-python3.11-ubuntu22.04-aws
+          dep_groups: "[all]"
     steps:
 
     - name: Checkout
diff --git a/scripts/inference/convert_composer_mpt_to_ft.py b/scripts/inference/convert_composer_mpt_to_ft.py
index 16cfabf125..cd1815e20f 100644
--- a/scripts/inference/convert_composer_mpt_to_ft.py
+++ b/scripts/inference/convert_composer_mpt_to_ft.py
@@ -132,7 +132,7 @@ def write_ft_checkpoint_from_composer_checkpoint(
     # Extract the HF tokenizer
     print('#' * 30)
     print('Extracting HF Tokenizer...')
-    hf_tokenizer = get_hf_tokenizer_from_composer_state_dict(
+    hf_tokenizer = get_hf_tokenizer_from_composer_state_dict(  # pyright: ignore
         composer_state_dict,
         trust_remote_code,
     )
@@ -141,7 +141,7 @@ def write_ft_checkpoint_from_composer_checkpoint(
 
     # Extract the model weights
     weights_state_dict = composer_state_dict['state']['model']
-    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
+    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(  # pyright: ignore
         weights_state_dict,
         prefix='model.',
     )
diff --git a/scripts/inference/convert_composer_to_hf.py b/scripts/inference/convert_composer_to_hf.py
index dc7314f3e9..3afd09cb46 100644
--- a/scripts/inference/convert_composer_to_hf.py
+++ b/scripts/inference/convert_composer_to_hf.py
@@ -133,7 +133,7 @@ def write_huggingface_pretrained_from_composer_checkpoint(
     weights_state_dict = composer_state_dict
     if 'state' in weights_state_dict:
         weights_state_dict = weights_state_dict['state']['model']
-    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(
+    torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(  # pyright: ignore
         weights_state_dict,
         prefix='model.',
     )
diff --git a/setup.py b/setup.py
index 566e6aae9c..9c54fdc232 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@
     'accelerate>=0.25,<1.2',  # for HF inference `device_map`
     'transformers>=4.43.2,<4.47',
     'mosaicml-streaming>=0.9.0,<0.10',
-    'torch>=2.4.0,<2.4.1',
+    'torch>=2.4.0,<2.5.2',
     'datasets>=2.20.0,<2.21',
     'fsspec==2023.6.0',  # newer version results in a bug in datasets that duplicates data
     'sentencepiece==0.2.0',