Skip to content

Commit

Permalink
More fixes. Able to do forward
Browse files Browse the repository at this point in the history
  • Loading branch information
krammnic committed Jan 11, 2025
1 parent 18f8bc5 commit e69a77c
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions torchtune/models/phi4/_model_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ def phi4_mini() -> TransformerDecoder:
such as sliding window attention.
Returns:
TransformerDecoder: Instantiation of Phi4 Mini 4K Instruct Model
TransformerDecoder: Instantiation of Phi4 Mini 16K Instruct Model
"""
return phi3(
vocab_size=100_352,
num_layers=32,
num_heads=32,
num_kv_heads=32,
embed_dim=3072,
intermediate_dim=8192,
num_layers=40,
num_heads=20,
num_kv_heads=10,
embed_dim=5120,
intermediate_dim=17920,
max_seq_len=16384,
attn_dropout=0.0,
norm_eps=1e-5,
Expand Down Expand Up @@ -73,7 +73,7 @@ def lora_phi4_mini(
quantize_base: bool = False,
) -> TransformerDecoder:
"""
Builder for creating a Phi4 Mini (3.8b) model with LoRA enabled.
Builder for creating a Phi4 (14b) model with LoRA enabled.
The Phi4 defaults are the same as in :func:`~torchtune.models.phi4.phi4_mini`,
while LoRA default params are based on
Expand Down Expand Up @@ -102,11 +102,11 @@ def lora_phi4_mini(
apply_lora_to_mlp=apply_lora_to_mlp,
apply_lora_to_output=apply_lora_to_output,
vocab_size=100_352,
num_layers=32,
num_heads=32,
num_kv_heads=32,
embed_dim=3072,
intermediate_dim=8192,
num_layers=40,
num_heads=20,
num_kv_heads=10,
embed_dim=5120,
intermediate_dim=17920,
max_seq_len=16384,
attn_dropout=0.0,
norm_eps=1e-5,
Expand Down

0 comments on commit e69a77c

Please sign in to comment.