Merge pull request #25 from knuedd/develop

Test scripts for SLURM functionality
timcallow · Jan 7, 2025 · 8d729f1 · 8d729f1
2 parents 0b8f43b + 8997812
commit 8d729f1
Show file tree

Hide file tree

Showing 4 changed files with 238 additions and 0 deletions.
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,31 @@
+# Tests for the datalad Slurm extension
+
+The following tests scripts can be executed manually and should run correctly or produce errors that should be handled as errors.
+
+Since it needs to work on datalad repositories which are also git repositories, and because a working Slurm environment is required, this is not (yet) part of automated CI tests ... let's see later if this would be feasible via git CI anyway.
+
+
+
+## In general
+
+Each test should be run as:
+
+`./test_x.sh <dir>`, where `<dir>` is some (temporary) directory to store the test results.
+
+All tests will create their own temporary datalad repo inside `<dir>` and work inside that. They can be removed after with `chmod -R u+w datalad-slurm-test*/; rm -Rf datalad-slurm-test*/`
+
+The `slurm_test*.template.sh` files need to be modified to match the local slurm environment.
+
+## Test 01
+
+Test creating many job dirs with job scripts in it, then `datalad schedule` and run all jobs, wait until all run through, then `datalad finish` all jobs.
+
+This should run without any errors.
+
+## Test 02
+
+Test creating many job dirs with job scripts in it like in Test 01. However, they have conflicting output directories so datalad should refuse to schedule some of them.
+
+This should produce some errors by datalad:
+* The first bunch of jobs should run fine including a clean `datalad finish`
+* The second bunch of jobs schould not get scheduled because datalad sees the conflict and refuses to schedule them.
diff --git a/tests/slurm_test01.template.sh b/tests/slurm_test01.template.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#SBATCH --job-name="DLtest01"         # name of the job
+#SBATCH --partition=casus_genoa       # partition to be used (defq, gpu or intel)
+#SBATCH -A casus
+#SBATCH --time=0:05:00                # walltime (up to 96 hours)
+#SBATCH --ntasks=1                    # number of nodes
+#SBATCH --cpus-per-task=1             # number of tasks per node
+#SBATCH --output=log.slurm-%j.out
+
+
+echo "started"
+
+OUTPUT="output_test_"`date -Is|tr -d ":"`.txt
+
+# simulate some text output
+for i in `seq 1 50`; do
+
+    echo $i | tee -a $OUTPUT
+    sleep 1s
+done
+
+# simulate some binary output which will become an annex file
+bzip2 -k $OUTPUT
+
+echo "ended"
diff --git a/tests/test_01_many_jobs.sh b/tests/test_01_many_jobs.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+
+set -e # abort on errors
+
+# Test datalad 'schedule' and 'finish' functionality
+#   - create some job dirs and job scripts and 'commit' them
+#   - then 'datalad schedule' all jobs from their job dirs
+#   - wait until all of them are finished, then run 'datalad finish'
+#
+# Expected results: should run without any errors
+
+if [[ -z $1 ]] ; then
+
+    echo "no temporary directory for tests given, abort"
+    echo ""
+    echo "... call as $0 <dir>"
+
+    exit -1
+fi
+
+D=$1
+
+echo "start"
+
+B=`dirname $0`
+
+echo "from src dir "$B
+
+## create a test repo
+
+TESTDIR=$D/"datalad-slurm-test-01_"`date -Is|tr -d ":"`
+
+datalad create -c text2git $TESTDIR
+
+
+### generic part for all the tests ending here, specific parts follow ###
+
+
+cp $B/slurm_test01.template.sh $TESTDIR/
+cd $TESTDIR
+
+TARGETS=`seq 17 21`
+
+for i in $TARGETS ; do
+
+    DIR="test_01_output_dir_"$i
+    mkdir -p $DIR
+
+    cp slurm_test01.template.sh $DIR/slurm_test01.sh
+
+done
+
+datalad save -m "add test job dirs and scripts"
+
+for i in $TARGETS ; do
+
+    DIR="test_01_output_dir_"$i
+
+    cd $DIR
+    datalad schedule -o $PWD sbatch slurm_test01.sh
+    cd ..
+
+done
+
+while [[ 0 != `squeue -u $USER | grep "DLtest01" | wc -l` ]] ; do
+
+    echo "    ... wait for jobs to finish"
+    sleep 1m
+done
+
+datalad finish --list-open-jobs
+
+echo "finishing completed jobs:"
+datalad finish
+
+echo " ### git log in this repo ### "
+echo ""
+git log
+
+
+
diff --git a/tests/test_02_conflicting_jobs.sh b/tests/test_02_conflicting_jobs.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+
+set +e # do NOT abort on errors
+
+# Test datalad 'schedule' and 'finish' functionality
+#   - create some job dirs and job scripts and 'commit' them
+#   - then 'datalad schedule' all jobs from their job dirs
+#   - then 'datalad schedule' more jobs from the same set of job dirs
+#   - wait until all of them are finished, then run 'datalad finish'
+#
+# Expected results: should handle the first set of jobs fine until the end, 
+# but refuse to schedule the second set of jobs
+
+if [[ -z $1 ]] ; then
+
+    echo "no temporary directory for tests given, abort"
+    echo ""
+    echo "... call as $0 <dir>"
+
+    exit -1
+fi
+
+D=$1
+
+echo "start"
+
+B=`dirname $0`
+
+echo "from src dir "$B
+
+## create a test repo
+
+TESTDIR=$D/"datalad-slurm-test-02_"`date -Is|tr -d ":"`
+
+datalad create -c text2git $TESTDIR
+
+
+### generic part for all the tests ending here, specific parts follow ###
+
+
+cp $B/slurm_test01.template.sh $TESTDIR/
+cd $TESTDIR
+
+TARGETS=`seq 17 21`
+
+for i in $TARGETS ; do
+
+    DIR="test_02_output_dir_"$i
+    mkdir -p $DIR
+
+    cp slurm_test01.template.sh $DIR/slurm_test01.sh
+    cp slurm_test01.template.sh $DIR/slurm_test02.sh
+
+done
+
+datalad save -m "add test job dirs and scripts"
+
+echo "    --> schedule some jobs"
+
+for i in $TARGETS ; do
+
+    DIR="test_02_output_dir_"$i
+
+    cd $DIR
+    datalad schedule -o $PWD sbatch slurm_test01.sh
+    cd ..
+
+done
+
+sleep 5s
+
+echo "    --> now try to schedule conflicting jobs"
+
+for i in $TARGETS ; do
+
+    DIR="test_02_output_dir_"$i
+
+    cd $DIR
+    datalad schedule -o $PWD sbatch slurm_test02.sh
+    cd ..
+
+done
+
+
+while [[ 0 != `squeue -u $USER | grep "DLtest01" | wc -l` ]] ; do
+
+    echo "    ... wait for jobs to finish"
+    sleep 1m
+done
+
+datalad finish --list-open-jobs
+
+echo "finishing completed jobs:"
+datalad finish
+
+echo " ### git log in this repo ### "
+echo ""
+git log
+
+
+