generated from datalad/datalad-extension-template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from knuedd/develop
Test scripts for SLURM functionality
- Loading branch information
Showing
4 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Tests for the datalad Slurm extension | ||
|
||
The following tests scripts can be executed manually and should run correctly or produce errors that should be handled as errors. | ||
|
||
Since it needs to work on datalad repositories which are also git repositories, and because a working Slurm environment is required, this is not (yet) part of automated CI tests ... let's see later if this would be feasible via git CI anyway. | ||
|
||
|
||
|
||
## In general | ||
|
||
Each test should be run as: | ||
|
||
`./test_x.sh <dir>`, where `<dir>` is some (temporary) directory to store the test results. | ||
|
||
All tests will create their own temporary datalad repo inside `<dir>` and work inside that. They can be removed after with `chmod -R u+w datalad-slurm-test*/; rm -Rf datalad-slurm-test*/` | ||
|
||
The `slurm_test*.template.sh` files need to be modified to match the local slurm environment. | ||
|
||
## Test 01 | ||
|
||
Test creating many job dirs with job scripts in it, then `datalad schedule` and run all jobs, wait until all run through, then `datalad finish` all jobs. | ||
|
||
This should run without any errors. | ||
|
||
## Test 02 | ||
|
||
Test creating many job dirs with job scripts in it like in Test 01. However, they have conflicting output directories so datalad should refuse to schedule some of them. | ||
|
||
This should produce some errors by datalad: | ||
* The first bunch of jobs should run fine including a clean `datalad finish` | ||
* The second bunch of jobs schould not get scheduled because datalad sees the conflict and refuses to schedule them. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name="DLtest01" # name of the job | ||
#SBATCH --partition=casus_genoa # partition to be used (defq, gpu or intel) | ||
#SBATCH -A casus | ||
#SBATCH --time=0:05:00 # walltime (up to 96 hours) | ||
#SBATCH --ntasks=1 # number of nodes | ||
#SBATCH --cpus-per-task=1 # number of tasks per node | ||
#SBATCH --output=log.slurm-%j.out | ||
|
||
|
||
echo "started" | ||
|
||
OUTPUT="output_test_"`date -Is|tr -d ":"`.txt | ||
|
||
# simulate some text output | ||
for i in `seq 1 50`; do | ||
|
||
echo $i | tee -a $OUTPUT | ||
sleep 1s | ||
done | ||
|
||
# simulate some binary output which will become an annex file | ||
bzip2 -k $OUTPUT | ||
|
||
echo "ended" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e # abort on errors | ||
|
||
# Test datalad 'schedule' and 'finish' functionality | ||
# - create some job dirs and job scripts and 'commit' them | ||
# - then 'datalad schedule' all jobs from their job dirs | ||
# - wait until all of them are finished, then run 'datalad finish' | ||
# | ||
# Expected results: should run without any errors | ||
|
||
if [[ -z $1 ]] ; then | ||
|
||
echo "no temporary directory for tests given, abort" | ||
echo "" | ||
echo "... call as $0 <dir>" | ||
|
||
exit -1 | ||
fi | ||
|
||
D=$1 | ||
|
||
echo "start" | ||
|
||
B=`dirname $0` | ||
|
||
echo "from src dir "$B | ||
|
||
## create a test repo | ||
|
||
TESTDIR=$D/"datalad-slurm-test-01_"`date -Is|tr -d ":"` | ||
|
||
datalad create -c text2git $TESTDIR | ||
|
||
|
||
### generic part for all the tests ending here, specific parts follow ### | ||
|
||
|
||
cp $B/slurm_test01.template.sh $TESTDIR/ | ||
cd $TESTDIR | ||
|
||
TARGETS=`seq 17 21` | ||
|
||
for i in $TARGETS ; do | ||
|
||
DIR="test_01_output_dir_"$i | ||
mkdir -p $DIR | ||
|
||
cp slurm_test01.template.sh $DIR/slurm_test01.sh | ||
|
||
done | ||
|
||
datalad save -m "add test job dirs and scripts" | ||
|
||
for i in $TARGETS ; do | ||
|
||
DIR="test_01_output_dir_"$i | ||
|
||
cd $DIR | ||
datalad schedule -o $PWD sbatch slurm_test01.sh | ||
cd .. | ||
|
||
done | ||
|
||
while [[ 0 != `squeue -u $USER | grep "DLtest01" | wc -l` ]] ; do | ||
|
||
echo " ... wait for jobs to finish" | ||
sleep 1m | ||
done | ||
|
||
datalad finish --list-open-jobs | ||
|
||
echo "finishing completed jobs:" | ||
datalad finish | ||
|
||
echo " ### git log in this repo ### " | ||
echo "" | ||
git log | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env bash | ||
|
||
set +e # do NOT abort on errors | ||
|
||
# Test datalad 'schedule' and 'finish' functionality | ||
# - create some job dirs and job scripts and 'commit' them | ||
# - then 'datalad schedule' all jobs from their job dirs | ||
# - then 'datalad schedule' more jobs from the same set of job dirs | ||
# - wait until all of them are finished, then run 'datalad finish' | ||
# | ||
# Expected results: should handle the first set of jobs fine until the end, | ||
# but refuse to schedule the second set of jobs | ||
|
||
if [[ -z $1 ]] ; then | ||
|
||
echo "no temporary directory for tests given, abort" | ||
echo "" | ||
echo "... call as $0 <dir>" | ||
|
||
exit -1 | ||
fi | ||
|
||
D=$1 | ||
|
||
echo "start" | ||
|
||
B=`dirname $0` | ||
|
||
echo "from src dir "$B | ||
|
||
## create a test repo | ||
|
||
TESTDIR=$D/"datalad-slurm-test-02_"`date -Is|tr -d ":"` | ||
|
||
datalad create -c text2git $TESTDIR | ||
|
||
|
||
### generic part for all the tests ending here, specific parts follow ### | ||
|
||
|
||
cp $B/slurm_test01.template.sh $TESTDIR/ | ||
cd $TESTDIR | ||
|
||
TARGETS=`seq 17 21` | ||
|
||
for i in $TARGETS ; do | ||
|
||
DIR="test_02_output_dir_"$i | ||
mkdir -p $DIR | ||
|
||
cp slurm_test01.template.sh $DIR/slurm_test01.sh | ||
cp slurm_test01.template.sh $DIR/slurm_test02.sh | ||
|
||
done | ||
|
||
datalad save -m "add test job dirs and scripts" | ||
|
||
echo " --> schedule some jobs" | ||
|
||
for i in $TARGETS ; do | ||
|
||
DIR="test_02_output_dir_"$i | ||
|
||
cd $DIR | ||
datalad schedule -o $PWD sbatch slurm_test01.sh | ||
cd .. | ||
|
||
done | ||
|
||
sleep 5s | ||
|
||
echo " --> now try to schedule conflicting jobs" | ||
|
||
for i in $TARGETS ; do | ||
|
||
DIR="test_02_output_dir_"$i | ||
|
||
cd $DIR | ||
datalad schedule -o $PWD sbatch slurm_test02.sh | ||
cd .. | ||
|
||
done | ||
|
||
|
||
while [[ 0 != `squeue -u $USER | grep "DLtest01" | wc -l` ]] ; do | ||
|
||
echo " ... wait for jobs to finish" | ||
sleep 1m | ||
done | ||
|
||
datalad finish --list-open-jobs | ||
|
||
echo "finishing completed jobs:" | ||
datalad finish | ||
|
||
echo " ### git log in this repo ### " | ||
echo "" | ||
git log | ||
|
||
|
||
|