diff --git a/34b_launch_script.sh b/34b_launch_script.sh index 3718c5862..2dd0166b8 100644 --- a/34b_launch_script.sh +++ b/34b_launch_script.sh @@ -10,9 +10,8 @@ #SBATCH --open-mode=append #SBATCH --requeue -# setup the environment using the script we created before -source /fsx/proj-mathlm/conda_setup_deeperspeed.sh -#source /fsx/quentin/setup.sh +# setup the conda environment and paths to NCCL install. +source /path/to/conda_setup_script.sh ds_report @@ -36,4 +35,4 @@ export DLTS_HOSTFILE=path/to/hostfile/hosts_$SLURM_JOBID # launch distributed job. If using `"deepspeed_slurm": true` and `"launcher": "slurm"` on a SLURM cluster, # then NeoX will handle the creation of a distributed run across 256 gpus. python $TRAIN_PATH/deepy.py $TRAIN_PATH/train.py \ - --conf_dir /path/to/math-lm/pretraining llemma_34b.yml data_mixture.yml \ No newline at end of file + --conf_dir /path/to/math-lm/pretraining llemma_34b.yml data_mixture.yml diff --git a/configs/data_mixture.yml b/configs/data_mixture.yml index 7c6a1b9f1..637ece59c 100644 --- a/configs/data_mixture.yml +++ b/configs/data_mixture.yml @@ -1,6 +1,6 @@ { - "train-data-paths": ["/fsx/proj-mathlm/proof-pile_llama/train/arxiv-rp/arxiv-rp_text_document", "/fsx/proj-mathlm/open-web-math-v1.2_llama/train/open-web-math/open-web-math_text_document", "/fsx/proj-mathlm/code-with-proofsteps_llama/train/code-with-proofsteps/code-with-proofsteps_text_document", "/fsx/proj-mathlm/proof-pile_llama/train/pile-sample/pile-sample_text_document", "/fsx/proj-mathlm/code-rp_llama/train/code-rp/code-rp_text_document"], + "train-data-paths": ["/path/to/proof-pile_llama_tokenizer/train/arxiv-rp/arxiv-rp_text_document", "/path/to/open-web-math-v1.2_llama/train/open-web-math/open-web-math_text_document", "/path/to/proof-pile_llama_tokenizer/code-with-proofsteps_llama/train/code-with-proofsteps/code-with-proofsteps_text_document", "/path/to/proof-pile_llama_tokenizer/train/pile-sample/pile-sample_text_document", "/path/to/code-rp_llama_tokenizer/train/code-rp/code-rp_text_document"], "train-data-weights": [2, 4, 1, 0.147368, 0.221053], - "valid-data-paths": ["/fsx/proj-mathlm/proof-pile_llama/validation/arxiv-rp/arxiv-rp_text_document", "/fsx/proj-mathlm/open-web-math-v1.2_llama/validation/open-web-math/open-web-math_text_document", "/fsx/proj-mathlm/code-with-proofsteps_llama/validation/code-with-proofsteps/code-with-proofsteps_text_document"], - "test-data-paths": ["/fsx/proj-mathlm/proof-pile_llama/test/arxiv-rp/arxiv-rp_text_document", "/fsx/proj-mathlm/open-web-math-v1.2_llama/test/open-web-math/open-web-math_text_document", "/fsx/proj-mathlm/code-with-proofsteps_llama/test/code-with-proofsteps/code-with-proofsteps_text_document"], -} \ No newline at end of file + "valid-data-paths": ["/path/to/proof-pile_llama_tokenizer/validation/arxiv-rp/arxiv-rp_text_document", "/path/to/open-web-math-v1.2_llama/validation/open-web-math/open-web-math_text_document", "/path/to/proof-pile_llama_tokenizer/validation/code-with-proofsteps/code-with-proofsteps_text_document"], + "test-data-paths": ["/path/to/proof-pile_llama_tokenizer/test/arxiv-rp/arxiv-rp_text_document", "/path/to/open-web-math-v1.2_llama/test/open-web-math/open-web-math_text_document", "/path/to/proof-pile_llama_tokenizer/code-with-proofsteps_llama/test/code-with-proofsteps/code-with-proofsteps_text_document"], +} diff --git a/configs/llemma_34b.yml b/configs/llemma_34b.yml index 88a714575..46e1396cc 100644 --- a/configs/llemma_34b.yml +++ b/configs/llemma_34b.yml @@ -98,11 +98,11 @@ "use_wandb": true, - "wandb_group": "34b-codellama-5e-5lr", - "wandb_project": "math-lm", + "wandb_group": "llemma_34b_replication", + "wandb_project": "your-project-name", "wandb_team": "your-teamname-here", "wandb_host": "https://api.wandb.ai", "launcher": "slurm", "deepspeed_slurm": true -} \ No newline at end of file +} diff --git a/configs/llemma_7b.yml b/configs/llemma_7b.yml index c77c1c841..fa61af90f 100644 --- a/configs/llemma_7b.yml +++ b/configs/llemma_7b.yml @@ -95,11 +95,11 @@ "checkpoint_validation_with_forward_pass": true, "use_wandb": true, - "wandb_group": "codellama_200btok_mp2_32node_2gas_deeperspeed", - "wandb_project": "math-lm", + "wandb_group": "llemma_7b_replication", + "wandb_project": "your-project-name", "wandb_team": "your-teamname-here", "wandb_host": "https://api.wandb.ai", "launcher": "slurm", "deepspeed_slurm": true -} \ No newline at end of file +}