From 1e029724deabbc93b66aaabffa7edf1a8fc729c5 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Tue, 24 Sep 2024 02:27:40 +0000 Subject: [PATCH] fix it --- open_diloco/simulate_multi_node.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/open_diloco/simulate_multi_node.sh b/open_diloco/simulate_multi_node.sh index c5def4a..fde4efa 100755 --- a/open_diloco/simulate_multi_node.sh +++ b/open_diloco/simulate_multi_node.sh @@ -57,7 +57,7 @@ mkdir -p logs for i in $(seq 0 $(($N - 1 ))) do > logs/log$i - CUDA_VISIBLE_DEVICES=$(get_cuda_devices $NUM_GPU $i) uv run torchrun --nproc_per_node=$NUM_GPU --node-rank $i --rdzv-endpoint localhost:9999 --nnodes=$N $@ > logs/log$i 2>&1 & + CUDA_VISIBLE_DEVICES=$(get_cuda_devices $NUM_GPU $i) torchrun --nproc_per_node=$NUM_GPU --node-rank $i --rdzv-endpoint localhost:9999 --nnodes=$N $@ > logs/log$i 2>&1 & child_pids+=($!) done