Skip to content

Commit

Permalink
Merge branch 'qanthony/mup' of https://github.com/EleutherAI/nanoGPT-mup
Browse files Browse the repository at this point in the history
 into qanthony/mup
  • Loading branch information
Quentin-Anthony committed Sep 17, 2024
2 parents 775dbf9 + fa1dbf3 commit 7d8669f
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 14 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ __pycache__/
input.txt
env/
venv/
coord_check/*/out/*
mutransfer_lr/*/out/*
mup_examples/*/*/out/*
2 changes: 1 addition & 1 deletion mup_examples/coord_check_shakespeare_char/mup/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ do
n_heads=$((width / head_size))
mup_base_width=256
mup_width_multiplier=$(echo "scale=8; $width/$mup_base_width" | bc -l)
out_dir="coord_check/mup/out/width${width}_depth2_seed${seed}"
out_dir="mup_examples/coord_check_shakespeare_char/mup/out/width${width}_depth2_seed${seed}"
python train.py \
--out_dir=$out_dir \
--eval_interval=1 \
Expand Down
2 changes: 1 addition & 1 deletion mup_examples/coord_check_shakespeare_char/sp/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ do
do
head_size=64
n_heads=$((width / head_size))
out_dir="coord_check/sp/out/width${width}_depth2_seed${seed}"
out_dir="mup_examples/coord_check_shakespeare_char/sp/out/width${width}_depth2_seed${seed}"
python train.py \
--out_dir=$out_dir \
--eval_interval=1 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ do
n_heads=$((width / head_size))
mup_base_width=256
mup_width_multiplier=$(echo "scale=8; $width/$mup_base_width" | bc -l)
out_dir="coord_check/sp_with_mup_hidden_init/out/width${width}_depth2_seed${seed}"
out_dir="mup_examples/coord_check_shakespeare_char/sp_with_mup_hidden_init/out/width${width}_depth2_seed${seed}"
python train.py \
--out_dir=$out_dir \
--eval_interval=1 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ do
n_heads=$((width / head_size))
mup_base_width=256
mup_width_multiplier=$(echo "scale=8; $width/$mup_base_width" | bc -l)
out_dir="coord_check/sp_with_mup_hidden_init_and_lr/out/width${width}_depth2_seed${seed}"
out_dir="mup_examples/coord_check_shakespeare_char/sp_with_mup_hidden_init_and_lr/out/width${width}_depth2_seed${seed}"
python train.py \
--out_dir=$out_dir \
--eval_interval=1 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ do
n_heads=$((width / head_size))
mup_base_width=256
mup_width_multiplier=$(echo "scale=8; $width/$mup_base_width" | bc -l)
out_dir="coord_check/sp_with_mup_hidden_init_and_lr_output_logits/out/width${width}_depth2_seed${seed}"
out_dir="mup_examples/coord_check_shakespeare_char/sp_with_mup_hidden_init_and_lr_output_logits/out/width${width}_depth2_seed${seed}"
python train.py \
--out_dir=$out_dir \
--eval_interval=1 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ do
n_heads=$((width / head_size))
mup_base_width=256
mup_width_multiplier=$(echo "scale=8; $width/$mup_base_width" | bc -l)
out_dir="coord_check/sp_with_mup_hidden_init_and_lr_partial_output_logits/out/width${width}_depth2_seed${seed}"
out_dir="mup_examples/coord_check_shakespeare_char/sp_with_mup_hidden_init_and_lr_partial_output_logits/out/width${width}_depth2_seed${seed}"
mup_output_alpha=$(echo "scale=8; sqrt($mup_width_multiplier)" | bc -l)
python train.py \
--out_dir=$out_dir \
Expand Down
2 changes: 1 addition & 1 deletion mup_examples/mutransfer_lr_shakespeare_char/mup/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ LAYERS=2

for width in 256 512 1024 2048
do
for lr in 0.0009765625 0.00048828125 0.000244140625 0.0001220703125 0.00006103515625
for lr in 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625 0.0001220703125 0.00006103515625
do
for seed in 1 2 3
do
Expand Down
9 changes: 6 additions & 3 deletions mup_examples/mutransfer_lr_shakespeare_char/plot.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions mup_examples/mutransfer_lr_shakespeare_char/sp/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ LAUNCHER=python

LAYERS=2

for width in 1024 2048
for width in 256 512 1024 2048
do
for lr in 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625 0.0001220703125 0.00006103515625 0.00003051757812
for lr in 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625 0.0001220703125 0.00006103515625 0.00003051757812 0.00048828125 0.000244140625 0.0001220703125 0.00006103515625 0.00003051757812 0.00001525878906 0.000007629394531 0.000003814697266
do
for seed in 1 2 3
do
Expand Down

0 comments on commit 7d8669f

Please sign in to comment.