Skip to content

Commit

Permalink
scaling-0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangir-azerbayev committed Oct 21, 2023
1 parent cdf03e3 commit c9da39d
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 3 deletions.
49 changes: 49 additions & 0 deletions configs/scaling-0.3/1-4B.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"pipe_parallel_size": 1,
"model_parallel_size": 1,

"num_layers": 24,
"hidden_size": 2048,
"num_attention_heads": 16,
"seq_length": 2048,
"max_position_embeddings": 2048,
"pos_emb": "rotary",
"rotary_pct": 0.25,
"no_weight_tying": true,
"gpt_j_residual": true,
"output_layer_parallelism": "column",

"attention_config": [[["flash"], 24]],

"scaled_upper_triang_masked_softmax_fusion": true,
"bias_gelu_fusion": true,

"init_method": "small_init",
"output_layer_init_method": "wang_init",

"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0002,
"betas": [0.9, 0.95],
"eps": 1.0e-8
}
},
"min_lr": 0.00002,

"zero_optimization": {
"stage": 1,
"allgather_partitions": true,
"allgather_bucket_size": 500000000,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 500000000,
"contiguous_gradients": true,
"cpu_offload": false
},

"train_micro_batch_size_per_gpu": 64,
"gas": 2,
"data_impl": "mmap",
"num_workers": 1
}
49 changes: 49 additions & 0 deletions configs/scaling-0.3/410M.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"pipe_parallel_size": 1,
"model_parallel_size": 1,

"num_layers": 24,
"hidden_size": 1024,
"num_attention_heads": 16,
"seq_length": 2048,
"max_position_embeddings": 2048,
"pos_emb": "rotary",
"rotary_pct": 0.25,
"no_weight_tying": true,
"gpt_j_residual": true,
"output_layer_parallelism": "column",

"attention_config": [[["flash"], 24]],

"scaled_upper_triang_masked_softmax_fusion": true,
"bias_gelu_fusion": true,

"init_method": "small_init",
"output_layer_init_method": "wang_init",

"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0003,
"betas": [0.9, 0.95],
"eps": 1.0e-8
}
},
"min_lr": 0.00003,

"zero_optimization": {
"stage": 1,
"allgather_partitions": true,
"allgather_bucket_size": 500000000,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 500000000,
"contiguous_gradients": true,
"cpu_offload": false
},

"train_micro_batch_size_per_gpu": 64,
"gas": 2,
"data_impl": "mmap",
"num_workers": 1
}
8 changes: 5 additions & 3 deletions slurm/scaling-0.3/scaling.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH --job-name="eleutherscaling"
#SBATCH --array=1
#SBATCH --array=0-15
# #SBATCH --account=dw87
#SBATCH --comment="eleutherai"
#SBATCH --qos=dw87
Expand All @@ -20,8 +20,10 @@

# parameters, steps, warmup steps, eval interval
declare -a args=(
"70M,1024,100,256"
"160M,2048,200,512"
"1-4B,4096,400,1024" "1-4B,8192,800,1024" "1-4B,12288,1000,1024" "1-4B,16384,1000,1024"
"410M,2048,200,512" "410M,3072,300,512" "410M,4096,400,512" "410M,6144,600,512"
"160M,1536,150,512" "160M,2048,200,512" "160M,3072,300,512" "160M,4096,400,512"
"70M,1024,100,512" "70M,1536,150,512" "70M,2048,200,512" "70M,3072,300,512"
)
export SAVE_BASE_DIR="/home/za2514/compute/scaling/saved-weights/scaling-0.3"

Expand Down

0 comments on commit c9da39d

Please sign in to comment.