fix names

pytorch · Jan 11, 2025 · 18f8bc5 · 18f8bc5
1 parent 3630908
commit 18f8bc5
Show file tree

Hide file tree

Showing 9 changed files with 107 additions and 70 deletions.
diff --git a/recipes/configs/phi3/evaluation.yaml b/recipes/configs/phi3/evaluation.yaml
@@ -12,7 +12,7 @@ model:
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
+  checkpoint_dir: /tmp/phi-4
   checkpoint_files: [
     model-00001-of-00002.safetensors,
     model-00002-of-00002.safetensors
@@ -25,7 +25,7 @@ resume_from_checkpoint: False
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.phi4.phi4_mini_tokenizer
-  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
+  path: /tmp/phi-4/tokenizer.model
   max_seq_len: null
 
 # Environment

diff --git a/recipes/configs/phi4/mini_full.yaml b/recipes/configs/phi4/mini_full.yaml
@@ -1,9 +1,9 @@
 # Config for multi-device full finetuning in full_finetune_distributed.py
-# using a Phi3 Mini 4K Instruct
+# using a Phi4 16K Instruct
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/phi-4 --output-dir /tmp/phi-4 --hf-token <HF_TOKEN>
 #
 # Run this config on 4 GPUs using the following:
 #  tune run --nproc_per_node 4 full_finetune_distributed --config phi4/mini_full
@@ -26,20 +26,24 @@ model:
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.phi4.phi4_mini_tokenizer
-  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
+  path: /tmp/phi-4/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
+  checkpoint_dir: /tmp/phi-4
   checkpoint_files: [
-    model-00001-of-00002.safetensors,
-    model-00002-of-00002.safetensors
+    model-00001-of-00006.safetensors,
+    model-00002-of-00006.safetensors,
+    model-00003-of-00006.safetensors,
+    model-00004-of-00006.safetensors,
+    model-00005-of-00006.safetensors,
+    model-00006-of-00006.safetensors,
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI4_MINI
+  model_type: PHI3_MINI
 resume_from_checkpoint: False
 
 # Dataset

diff --git a/recipes/configs/phi4/mini_full_low_memory.yaml b/recipes/configs/phi4/mini_full_low_memory.yaml
@@ -1,9 +1,9 @@
 # Config for single device full finetuning in full_finetune_single_device.py
-# using a Phi3 Mini 4K Instruct
+# using a Phi4 16K Instruct
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/phi-4 --output-dir /tmp/phi-4 --hf-token <HF_TOKEN>
 #
 # The default config uses an optimizer from bitsandbytes. If you do not have it installed,
 # you can install it with
@@ -28,20 +28,24 @@ model:
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.phi4.phi4_mini_tokenizer
-  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
+  path: /tmp/phi-4/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
+  checkpoint_dir: /tmp/phi-4
   checkpoint_files: [
-    model-00001-of-00002.safetensors,
-    model-00002-of-00002.safetensors
+    model-00001-of-00006.safetensors,
+    model-00002-of-00006.safetensors,
+    model-00003-of-00006.safetensors,
+    model-00004-of-00006.safetensors,
+    model-00005-of-00006.safetensors,
+    model-00006-of-00006.safetensors,
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI4_MINI
+  model_type: PHI3_MINI
 resume_from_checkpoint: False
 
 # Dataset

diff --git a/recipes/configs/phi4/mini_lora.yaml b/recipes/configs/phi4/mini_lora.yaml
@@ -1,9 +1,9 @@
 # Config for multi-device LoRA finetuning in lora_finetune_distributed.py
-# using a Phi3 mini (3.8B) model
+# using a Phi4 (14B) model
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/phi-4 --output-dir /tmp/phi-4 --hf-token <HF_TOKEN>
 #
 # To launch on 2 devices, run the following command from root:
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi4/mini_lora
@@ -32,20 +32,24 @@ model:
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.phi4.phi4_mini_tokenizer
-  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
+  path: /tmp/phi-4/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
+  checkpoint_dir: /tmp/phi-4
   checkpoint_files: [
-    model-00001-of-00002.safetensors,
-    model-00002-of-00002.safetensors
+    model-00001-of-00006.safetensors,
+    model-00002-of-00006.safetensors,
+    model-00003-of-00006.safetensors,
+    model-00004-of-00006.safetensors,
+    model-00005-of-00006.safetensors,
+    model-00006-of-00006.safetensors,
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI4_MINI
+  model_type: PHI3_MINI
 resume_from_checkpoint: False
 save_adapter_weights_only: False
 

diff --git a/recipes/configs/phi4/mini_lora_single_device.yaml b/recipes/configs/phi4/mini_lora_single_device.yaml
@@ -1,9 +1,9 @@
 # Config for single device LoRA finetuning in lora_finetune_single_device.py
-# using a Phi3 mini (3.8B) model
+# using a Phi4 (14B) model
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/phi-4 --output-dir /tmp/phi-4 --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config phi4/mini_lora_single_device
@@ -30,20 +30,24 @@ model:
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.phi4.phi4_mini_tokenizer
-  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
+  path: /tmp/phi-4/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
+  checkpoint_dir: /tmp/phi-4
   checkpoint_files: [
-    model-00001-of-00002.safetensors,
-    model-00002-of-00002.safetensors
+    model-00001-of-00006.safetensors,
+    model-00002-of-00006.safetensors,
+    model-00003-of-00006.safetensors,
+    model-00004-of-00006.safetensors,
+    model-00005-of-00006.safetensors,
+    model-00006-of-00006.safetensors,
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI4_MINI
+  model_type: PHI3_MINI
 resume_from_checkpoint: False
 save_adapter_weights_only: False
 
@@ -95,7 +99,7 @@ profiler:
   enabled: False
 
   #Output directory of trace artifacts
-  output_dir: /tmp/Phi-4-mini-16k-instruct/profiling_outputs
+  output_dir: /tmp/phi-4/profiling_outputs
 
   #`torch.profiler.ProfilerActivity` types to trace
   cpu: True

diff --git a/recipes/configs/phi4/mini_qlora_single_device.yaml b/recipes/configs/phi4/mini_qlora_single_device.yaml
@@ -1,9 +1,9 @@
 # Config for single device QLoRA with lora_finetune_single_device.py
-# using a Phi3 mini (3.8B) model
+# using a Phi4 (14B) model
 #
 # This config assumes that you've run the following command before launching
 # this run:
-#   tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
+#   tune download microsoft/phi-4 --output-dir /tmp/phi-4 --hf-token <HF_TOKEN>
 #
 # To launch on a single device, run the following command from root:
 #   tune run lora_finetune_single_device --config phi4/mini_qlora_single_device
@@ -30,20 +30,24 @@ model:
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.phi4.phi4_mini_tokenizer
-  path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
+  path: /tmp/phi-4/tokenizer.model
   max_seq_len: null
 
 # Checkpointer
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
+  checkpoint_dir: /tmp/phi-4
   checkpoint_files: [
-    model-00001-of-00002.safetensors,
-    model-00002-of-00002.safetensors
+    model-00001-of-00006.safetensors,
+    model-00002-of-00006.safetensors,
+    model-00003-of-00006.safetensors,
+    model-00004-of-00006.safetensors,
+    model-00005-of-00006.safetensors,
+    model-00006-of-00006.safetensors,
   ]
   recipe_checkpoint: null
   output_dir: ${output_dir}
-  model_type: PHI4_MINI
+  model_type: PHI3_MINI
 resume_from_checkpoint: False
 save_adapter_weights_only: False
 
@@ -95,7 +99,7 @@ profiler:
   enabled: False
 
   # Output directory of trace artifacts
-  output_dir: /tmp/Phi-4-mini-16k-instruct/profiling_outputs
+  output_dir: /tmp/phi-4/profiling_outputs
 
   #`torch.profiler.ProfilerActivity` types to trace
   cpu: True

diff --git a/tests/torchtune/models/phi4/test_phi4_tokenizer.py b/tests/torchtune/models/phi4/test_phi4_tokenizer.py
@@ -23,15 +23,22 @@ def tokenizer(self):
     @pytest.fixture
     def expected_tokens(self):
         # fmt: off
-        tokens = [100257, 100264, 115, 121, 322, 398, 100265, 10, 1539, 470, 258, 1444, 933, 1940, 511, 446, 100266, 10,
-        100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334, 958, 99, 445, 98, 300, 258, 256, 281,
-        107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827, 512, 1080, 116, 300, 262, 1249, 524, 340, 10, 35, 35, 35, 828, 1160, 117, 807, 1037,
-        71, 1414, 534, 258, 1759, 511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35, 35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10, 
-        100264, 115, 121, 322, 398, 100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995, 505, 944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46, 270,
-        776, 1341, 258, 1279, 641, 563, 275, 469, 573, 284,  944, 320, 526, 962, 425, 913, 1402, 97, 356, 446, 115, 284, 1229,
-        1581, 282, 117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262, 739, 1886, 284, 783, 1803, 636, 277,
-        268, 117, 316, 485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531, 476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280, 274, 913, 601, 359, 300, 44, 335, 834, 335, 
-        531, 476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257] # noqa
+        tokens = [
+            100257, 100264, 115, 121, 322, 398, 100265, 10, 1539, 470, 258, 1444, 933, 1940, 511, 446, 100266, 10,
+            100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334,
+            958, 99, 445, 98, 300, 258, 256, 281,
+            107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827, 512, 1080, 116, 300, 262, 1249,
+            524, 340, 10, 35, 35, 35, 828, 1160, 117, 807, 1037,
+            71, 1414, 534, 258, 1759, 511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35,
+            35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10,
+            100264, 115, 121, 322, 398, 100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995, 505,
+            944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46, 270,
+            776, 1341, 258, 1279, 641, 563, 275, 469, 573, 284, 944, 320, 526, 962, 425, 913, 1402, 97, 356, 446, 115, 284, 1229,
+            1581, 282, 117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262, 739, 1886, 284, 783, 1803, 636, 277,
+            268, 117, 316, 485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531,
+            476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280, 274, 913, 601, 359, 300, 44, 335, 834, 335,
+            531, 476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257
+        ] # noqa
         return tokens
         # fmt: on
 
@@ -81,26 +88,35 @@ def test_tokenize_messages_no_system_prompt(self, tokenizer):
                 "good conversation over coffee.",
             ),
         ]
-        tokens, mask = tokenizer.tokenize_messages(messages, ignore_system_prompt=True, add_eos=True)
+        tokens, mask = tokenizer.tokenize_messages(
+            messages, ignore_system_prompt=True, add_eos=True
+        )
 
         # fmt: off
-        expected_tokens = [100257, 100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334, 958,
-        99, 445, 98, 300, 258, 256, 281, 107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827, 512, 1080, 116, 300, 262, 1249, 524, 340,
-        10, 35, 35, 35, 828, 1160, 117, 807, 1037, 71, 1414, 534, 258, 1759, 511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35,
-        35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10, 100264, 115, 121, 322, 398, 100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995,
-        505, 944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46, 270, 776, 1341, 258, 1279, 641, 563, 275, 469, 573, 284, 944, 320, 526, 962, 425,
-        913, 1402, 97, 356, 446, 115, 284, 1229, 1581, 282, 117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262, 739, 1886, 284, 783, 1803, 636, 277, 268, 117, 316,
-        485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531, 476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280, 274, 913, 601, 359, 300, 44, 335, 834, 335, 531,
-        476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257] # noqa
+        expected_tokens = [
+            100257, 100264, 115, 121, 322, 398, 100265, 10, 66, 478, 299, 351, 362, 292, 1160, 117, 807, 334, 958,
+            99, 445, 98, 300, 258, 256, 281, 107, 46, 411, 114, 561, 258, 1156, 279, 316, 334, 604, 337, 112, 445, 1827,
+            512, 1080, 116, 300, 262, 1249, 524, 340,
+            10, 35, 35, 35, 828, 1160, 117, 807, 1037, 71, 1414, 534, 258, 1759,
+            511, 355, 285, 875, 550, 102, 1546, 265, 105, 111, 340, 10, 35,
+            35, 35, 408, 300, 112, 279, 316, 1037, 100266, 10, 100264, 115, 121, 322, 398,
+            100265, 10, 73, 776, 362, 425, 1978, 274, 284, 1528, 319, 995,
+            505, 944, 874, 903, 1585, 616, 345, 1528, 115, 284, 1749, 803, 46,
+            270, 776, 1341, 258, 1279, 641, 563, 275, 469, 573,
+            284, 944, 320, 526, 962, 425, 913, 1402, 97, 356, 446, 115, 284, 1229, 1581, 282,
+            117, 276, 259, 300, 46, 270, 776, 258, 1279, 275, 288, 283, 262,
+            739, 1886, 284, 783, 1803, 636, 277, 268, 117, 316,
+            485, 115, 284, 302, 416, 273, 900, 46, 270, 776, 591, 630, 346, 531, 476, 505, 768, 1233, 342, 1923, 292, 522, 662, 280,
+            274, 913, 601, 359, 300, 44, 335, 834, 335, 531,
+            476, 505, 604, 264, 509, 1456, 258, 771, 543, 1719, 405, 710, 665, 668, 1280, 46, 100266, 10, 100257
+        ] # noqa
         # fmt: on
 
         expected_mask = [True] * 81 + [False] * 127
         assert expected_tokens == tokens
         assert expected_mask == mask
 
-    def test_tokenize_message_drop_eos(
-        self, tokenizer, expected_tokens
-    ):
+    def test_tokenize_message_drop_eos(self, tokenizer, expected_tokens):
         """
         Test that the tokenizer will not add an EOS token or EOT token if user requests it.
         This is the most common case for inference.
@@ -126,8 +142,8 @@ def test_tokenize_message_drop_eos(
         ]
 
         tokens, mask = tokenizer.tokenize_messages(messages, add_eos=False)
-        
+
         expected_mask = [True] * 93 + [False] * 126
-        # Drop eos token
+        # Drop eos token.
         assert expected_tokens[:-1] == tokens
         assert expected_mask == mask
diff --git a/torchtune/models/phi4/__init__.py b/torchtune/models/phi4/__init__.py
@@ -4,8 +4,10 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from ._model_builders import (
-    phi4_mini,
-    lora_phi4_mini,
-    phi4_mini_tokenizer,
-)
+from ._model_builders import lora_phi4_mini, phi4_mini, phi4_mini_tokenizer  # noqa
+
+__all__ = [
+    "phi4_mini",
+    "phi4_mini_tokenizer",
+    "lora_phi4_mini",
+]
diff --git a/torchtune/models/phi4/_tokenizer.py b/torchtune/models/phi4/_tokenizer.py
@@ -31,7 +31,7 @@
 
 for token_id in range(100266, 100351):
     if token_id == 100276:
-        continue 
+        continue
     PHI4_SPECIAL_TOKENS[f"<|dummy_{87 - (100350 - token_id)}|>"] = token_id + 1
 
 CL100K_PATTERN = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""  # noqa
@@ -100,7 +100,7 @@ def __init__(
     @property
     def vocab_size(self):
         return self.tt_model.vocab_size
-    
+
     @property
     def base_vocab_size(self) -> int:
         return self.tt_model.base_vocab_size
@@ -139,7 +139,6 @@ def decode(self, ids: List[int], skip_special_tokens: bool = True) -> str:
                 ids_for_decode.append(token_id)
         return self.tt_model.decode(ids_for_decode)
 
-
     def tokenize_messages(
         self,
         messages: List[Message],