From a5b7236b269da58276c819098015169d24c3e6d9 Mon Sep 17 00:00:00 2001
From: Sami Jaghouar <sami.jaghouar@hotmail.fr>
Date: Thu, 12 Sep 2024 10:53:52 +0000
Subject: [PATCH] add better instruction

---
 open_diloco/run_training.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/open_diloco/run_training.sh b/open_diloco/run_training.sh
index f6e30a0..b0871d3 100755
--- a/open_diloco/run_training.sh
+++ b/open_diloco/run_training.sh
@@ -8,7 +8,7 @@
 
 
 # you can either pass a fixed initial peer or set it to auto and the script will start a dht server for you
-## ./run_training.sh 4 1 auto  --per-device-train-batch-size 8 --total-batch-size 128 --lr 1e-2 --path-model ../tests/models/llama-2m-fresh --project debug  --no-torch-compile --hv.local-steps 100  --fake-data --hv.matchmaking_time 2
+## ./run_training.sh 4 1 auto  --per-device-train-batch-size 8 --total-batch-size 128 --lr 1e-2 --path-model ../tests/models/llama-2m-fresh --project debug  --no-torch-compile --hv.local-steps 100  --fake-data --hv.matchmaking_time 2 --hv.fail_rank_drop --hv.skip_load_from_peers
 
 # Function to get CUDA devices based on the number of GPUs and index
 function get_cuda_devices() {