Skip to content

Commit

Permalink
Add Phi4
Browse files Browse the repository at this point in the history
  • Loading branch information
krammnic committed Dec 21, 2024
1 parent 1a43259 commit 3630908
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 52 deletions.
10 changes: 5 additions & 5 deletions recipes/configs/phi3/evaluation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@ output_dir: ./ # Not needed

# Model Arguments
model:
_component_: torchtune.models.phi3.phi3_mini
_component_: torchtune.models.phi4.phi4_mini

# Checkpointer
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
checkpoint_files: [
model-00001-of-00002.safetensors,
model-00002-of-00002.safetensors
]
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: PHI3_MINI
model_type: PHI4_MINI
resume_from_checkpoint: False

# Tokenizer
tokenizer:
_component_: torchtune.models.phi3.phi3_mini_tokenizer
path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
_component_: torchtune.models.phi4.phi4_mini_tokenizer
path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
max_seq_len: null

# Environment
Expand Down
18 changes: 9 additions & 9 deletions recipes/configs/phi4/mini_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,43 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
# tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
#
# Run this config on 4 GPUs using the following:
# tune run --nproc_per_node 4 full_finetune_distributed --config phi3/mini_full
# tune run --nproc_per_node 4 full_finetune_distributed --config phi4/mini_full
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nproc_per_node 4 full_finetune_distributed --config phi3/mini_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run --nproc_per_node 4 full_finetune_distributed --config phi4/mini_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# Single device full finetuning requires more memory optimizations. It's
# best to use mini_low_memory.yaml for those cases

output_dir: /tmp/torchtune/phi3_mini/full # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/phi4_mini/full # /tmp may be deleted by your system. Change it to your preference.

# Model arguments
model:
_component_: torchtune.models.phi3.phi3_mini
_component_: torchtune.models.phi4.phi4_mini

# Tokenizer
tokenizer:
_component_: torchtune.models.phi3.phi3_mini_tokenizer
path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
_component_: torchtune.models.phi4.phi4_mini_tokenizer
path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
max_seq_len: null

# Checkpointer
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
checkpoint_files: [
model-00001-of-00002.safetensors,
model-00002-of-00002.safetensors
]
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: PHI3_MINI
model_type: PHI4_MINI
resume_from_checkpoint: False

# Dataset
Expand Down
18 changes: 9 additions & 9 deletions recipes/configs/phi4/mini_full_low_memory.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,45 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
# tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
#
# The default config uses an optimizer from bitsandbytes. If you do not have it installed,
# you can install it with
# pip install bitsandbytes
#
# To launch on a single device, run the following command from root:
# tune run full_finetune_single_device --config phi3/mini_full_low_memory
# tune run full_finetune_single_device --config phi4/mini_full_low_memory
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run full_finetune_single_device --config phi3/mini_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run full_finetune_single_device --config phi4/mini_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.

output_dir: /tmp/torchtune/phi3_mini/full_low_memory # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/phi4_mini/full_low_memory # /tmp may be deleted by your system. Change it to your preference.

# Model arguments
model:
_component_: torchtune.models.phi3.phi3_mini
_component_: torchtune.models.phi4.phi4_mini

# Tokenizer
tokenizer:
_component_: torchtune.models.phi3.phi3_mini_tokenizer
path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
_component_: torchtune.models.phi4.phi4_mini_tokenizer
path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
max_seq_len: null

# Checkpointer
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
checkpoint_files: [
model-00001-of-00002.safetensors,
model-00002-of-00002.safetensors
]
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: PHI3_MINI
model_type: PHI4_MINI
resume_from_checkpoint: False

# Dataset
Expand Down
18 changes: 9 additions & 9 deletions recipes/configs/phi4/mini_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,25 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
# tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
#
# To launch on 2 devices, run the following command from root:
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi3/mini_lora
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi4/mini_lora
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi3/mini_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config phi4/mini_lora checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# For single device LoRA finetuning please use mini_lora_single_device.yaml
# or mini_qlora_single_device.yaml

output_dir: /tmp/torchtune/phi3_mini/lora # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/phi4_mini/lora # /tmp may be deleted by your system. Change it to your preference.

# Model arguments
model:
_component_: torchtune.models.phi3.lora_phi3_mini
_component_: torchtune.models.phi4.lora_phi4_mini
lora_attn_modules: ['q_proj', 'v_proj', 'output_proj']
apply_lora_to_mlp: True
apply_lora_to_output: False
Expand All @@ -31,21 +31,21 @@ model:

# Tokenizer
tokenizer:
_component_: torchtune.models.phi3.phi3_mini_tokenizer
path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
_component_: torchtune.models.phi4.phi4_mini_tokenizer
path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
max_seq_len: null

# Checkpointer
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
checkpoint_files: [
model-00001-of-00002.safetensors,
model-00002-of-00002.safetensors
]
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: PHI3_MINI
model_type: PHI4_MINI
resume_from_checkpoint: False
save_adapter_weights_only: False

Expand Down
20 changes: 10 additions & 10 deletions recipes/configs/phi4/mini_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
# tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
#
# To launch on a single device, run the following command from root:
# tune run lora_finetune_single_device --config phi3/mini_lora_single_device
# tune run lora_finetune_single_device --config phi4/mini_lora_single_device
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run lora_finetune_single_device --config phi3/mini_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run lora_finetune_single_device --config phi4/mini_lora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.

output_dir: /tmp/torchtune/phi3_mini/lora_single_device # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/phi4_mini/lora_single_device # /tmp may be deleted by your system. Change it to your preference.

# Model arguments
model:
_component_: torchtune.models.phi3.lora_phi3_mini
_component_: torchtune.models.phi4.lora_phi4_mini
lora_attn_modules: ['q_proj', 'v_proj', 'output_proj']
apply_lora_to_mlp: True
apply_lora_to_output: False
Expand All @@ -29,21 +29,21 @@ model:

# Tokenizer
tokenizer:
_component_: torchtune.models.phi3.phi3_mini_tokenizer
path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
_component_: torchtune.models.phi4.phi4_mini_tokenizer
path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
max_seq_len: null

# Checkpointer
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
checkpoint_files: [
model-00001-of-00002.safetensors,
model-00002-of-00002.safetensors
]
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: PHI3_MINI
model_type: PHI4_MINI
resume_from_checkpoint: False
save_adapter_weights_only: False

Expand Down Expand Up @@ -95,7 +95,7 @@ profiler:
enabled: False

#Output directory of trace artifacts
output_dir: /tmp/Phi-3-mini-4k-instruct/profiling_outputs
output_dir: /tmp/Phi-4-mini-16k-instruct/profiling_outputs

#`torch.profiler.ProfilerActivity` types to trace
cpu: True
Expand Down
20 changes: 10 additions & 10 deletions recipes/configs/phi4/mini_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download microsoft/Phi-3-mini-4k-instruct --output-dir /tmp/Phi-3-mini-4k-instruct --hf-token <HF_TOKEN>
# tune download microsoft/Phi-4-mini-16k-instruct --output-dir /tmp/Phi-4-mini-16k-instruct --hf-token <HF_TOKEN>
#
# To launch on a single device, run the following command from root:
# tune run lora_finetune_single_device --config phi3/mini_qlora_single_device
# tune run lora_finetune_single_device --config phi4/mini_qlora_single_device
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run lora_finetune_single_device --config phi3/mini_qlora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run lora_finetune_single_device --config phi4/mini_qlora_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.

output_dir: /tmp/torchtune/phi3_mini/qlora_single_device # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/phi4_mini/qlora_single_device # /tmp may be deleted by your system. Change it to your preference.

# Model arguments
model:
_component_: torchtune.models.phi3.qlora_phi3_mini
_component_: torchtune.models.phi4.qlora_phi4_mini
lora_attn_modules: ['q_proj', 'v_proj', 'output_proj']
apply_lora_to_mlp: True
apply_lora_to_output: False
Expand All @@ -29,21 +29,21 @@ model:

# Tokenizer
tokenizer:
_component_: torchtune.models.phi3.phi3_mini_tokenizer
path: /tmp/Phi-3-mini-4k-instruct/tokenizer.model
_component_: torchtune.models.phi4.phi4_mini_tokenizer
path: /tmp/Phi-4-mini-16k-instruct/tokenizer.model
max_seq_len: null

# Checkpointer
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Phi-3-mini-4k-instruct
checkpoint_dir: /tmp/Phi-4-mini-16k-instruct
checkpoint_files: [
model-00001-of-00002.safetensors,
model-00002-of-00002.safetensors
]
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: PHI3_MINI
model_type: PHI4_MINI
resume_from_checkpoint: False
save_adapter_weights_only: False

Expand Down Expand Up @@ -95,7 +95,7 @@ profiler:
enabled: False

# Output directory of trace artifacts
output_dir: /tmp/Phi-3-mini-4k-instruct/profiling_outputs
output_dir: /tmp/Phi-4-mini-16k-instruct/profiling_outputs

#`torch.profiler.ProfilerActivity` types to trace
cpu: True
Expand Down
14 changes: 14 additions & 0 deletions torchtune/_recipe_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ class Recipe:
name="phi3/mini_full_low_memory",
file_path="phi3/mini_full_low_memory.yaml",
),
Config(
name="phi4/mini_full_low_memory",
file_path="phi4/mini_full_low_memory.yaml",
),
Config(
name="qwen2/7B_full_single_device",
file_path="qwen2/7B_full_single_device.yaml",
Expand Down Expand Up @@ -114,6 +118,7 @@ class Recipe:
Config(name="gemma2/9B_full", file_path="gemma2/9B_full.yaml"),
Config(name="gemma2/27B_full", file_path="gemma2/27B_full.yaml"),
Config(name="phi3/mini_full", file_path="phi3/mini_full.yaml"),
Config(name="phi4/mini_full", file_path="phi4/mini_full.yaml"),
Config(name="qwen2/7B_full", file_path="qwen2/7B_full.yaml"),
Config(name="qwen2/0.5B_full", file_path="qwen2/0.5B_full.yaml"),
Config(name="qwen2/1.5B_full", file_path="qwen2/1.5B_full.yaml"),
Expand Down Expand Up @@ -252,6 +257,14 @@ class Recipe:
name="phi3/mini_qlora_single_device",
file_path="phi3/mini_qlora_single_device.yaml",
),
Config(
name="phi4/mini_lora_single_device",
file_path="phi4/mini_lora_single_device.yaml",
),
Config(
name="phi4/mini_qlora_single_device",
file_path="phi4/mini_qlora_single_device.yaml",
),
Config(
name="qwen2/7B_lora_single_device",
file_path="qwen2/7B_lora_single_device.yaml",
Expand Down Expand Up @@ -371,6 +384,7 @@ class Recipe:
Config(name="gemma2/9B_lora", file_path="gemma2/9B_lora.yaml"),
Config(name="gemma2/27B_lora", file_path="gemma2/27B_lora.yaml"),
Config(name="phi3/mini_lora", file_path="phi3/mini_lora.yaml"),
Config(name="phi4/mini_lora", file_path="phi3/mini_lora.yaml"),
Config(name="qwen2/7B_lora", file_path="qwen2/7B_lora.yaml"),
Config(name="qwen2/0.5B_lora", file_path="qwen2/0.5B_lora.yaml"),
Config(name="qwen2/1.5B_lora", file_path="qwen2/1.5B_lora.yaml"),
Expand Down

0 comments on commit 3630908

Please sign in to comment.