-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add container for slurm 23.02.1 / leap:15.4 (for MCH tasnam)
- Loading branch information
1 parent
dc65d7c
commit 0354725
Showing
5 changed files
with
384 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
FROM --platform=linux/amd64 opensuse/leap:15.4 | ||
|
||
ARG SLURM_VERSION=23.02.1 | ||
ARG SLURM_ROOT=/opt/slurm-${SLURM_VERSION} | ||
ARG SLURM_CONFDIR=${SLURM_ROOT}/etc | ||
|
||
ENV SLURM_VERSION ${SLURM_VERSION} | ||
ENV SLURM_ROOT ${SLURM_ROOT} | ||
ENV SLURM_CONFDIR ${SLURM_CONFDIR} | ||
|
||
|
||
RUN zypper install -y \ | ||
munge \ | ||
munge-devel \ | ||
libnuma1 \ | ||
libnuma-devel \ | ||
librrd8 \ | ||
readline-devel \ | ||
hwloc \ | ||
hwloc-devel \ | ||
hdf5 \ | ||
hdf5-devel \ | ||
lz4 \ | ||
liblz4-devel \ | ||
libz1 \ | ||
zlib-devel \ | ||
freeipmi \ | ||
freeipmi-devel \ | ||
dbus-1 \ | ||
dbus-1-devel \ | ||
make \ | ||
gcc \ | ||
gcc-c++ \ | ||
curl \ | ||
tar \ | ||
bzip2 \ | ||
python3 \ | ||
vim \ | ||
ca-certificates \ | ||
less \ | ||
mpich \ | ||
mpich-devel \ | ||
sudo | ||
|
||
RUN zypper install -y \ | ||
lua53 \ | ||
lua53-devel \ | ||
libmount-devel | ||
|
||
RUN useradd -M slurm | ||
|
||
RUN mkdir -p /var/log/slurm | ||
RUN mkdir -p /var/spool/slurmctld && chown slurm /var/spool/slurmctld && chmod u+rwx /var/spool/slurmctld | ||
RUN mkdir -p /var/spool/slurmd && chown slurm /var/spool/slurmd && chmod u+rwx /var/spool/slurmd | ||
|
||
|
||
COPY install_slurm.sh . | ||
|
||
RUN ./install_slurm.sh ${SLURM_VERSION} ${SLURM_ROOT} ${SLURM_CONFDIR} --enable-multiple-slurmd | ||
|
||
RUN mkdir -p ${SLURM_CONFDIR} | ||
COPY cgroup.conf ${SLURM_CONFDIR} | ||
COPY slurm.conf.in ${SLURM_CONFDIR} | ||
|
||
COPY entrypoint.sh . | ||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["bash"] | ||
|
||
#COPY run_slurm_examples example.job mpi_example.job plugin.cpp mpi_hello.c . | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
CgroupAutomount=yes | ||
ConstrainCores=no | ||
ConstrainRAMSpace=no | ||
CgroupMountpoint=/sys/fs/cgroup | ||
CgroupPlugin=cgroup/v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#!/bin/bash | ||
|
||
dbus-launch | ||
sudo -u munge munged | ||
|
||
. /usr/lib64/mpi/gcc/mpich/bin/mpivars.sh | ||
|
||
: "${SLURM_CONF_IN=$SLURM_CONFDIR/slurm.conf.in}" | ||
: "${SLURM_CONF=$SLURM_CONFDIR/slurm.conf}" | ||
|
||
# Default number of slurm nodes | ||
: "${SLURM_NUMNODES=3}" | ||
|
||
# Default slurm controller | ||
: "${SLURMCTLD_HOST=$HOSTNAME}" | ||
: "${SLURMCTLD_ADDR=127.0.0.1}" | ||
|
||
# Default node info | ||
: "${NODE_HOST=$HOSTNAME}" | ||
: "${NODE_ADDR=127.0.0.1}" | ||
: "${NODE_BASEPORT=6001}" | ||
|
||
# Default hardware profile | ||
: "${NODE_HW=CPUs=4}" | ||
|
||
# Generate node names and associated ports | ||
NODE_NAMES=$(printf "nd[%05i-%05i]" 1 $SLURM_NUMNODES) | ||
NODE_PORTS=$(printf "%i-%i" $NODE_BASEPORT $(($NODE_BASEPORT+$SLURM_NUMNODES-1))) | ||
|
||
|
||
echo "INFO:" | ||
echo "INFO: Creating $SLURM_CONF with" | ||
echo "INFO: " | ||
column -t <<-EOF | ||
INFO: SLURMCTLD_HOST=$SLURMCTLD_HOST SLURMCTLD_ADDR=$SLURMCTLD_ADDR | ||
INFO: NODE_HOST=$NODE_HOST NODE_ADDR=$NODE_ADDR NODE_BASEPORT=$NODE_BASEPORT | ||
INFO: NODE_HW=$NODE_HW | ||
INFO: SLURM_NUMNODES=$SLURM_NUMNODES | ||
EOF | ||
echo "INFO: " | ||
echo "INFO: Derived values:" | ||
echo "INFO:" | ||
column -t <<-EOF | ||
INFO: NODE_NAMES=$NODE_NAMES | ||
INFO: NODE_PORTS=$NODE_PORTS | ||
EOF | ||
echo "INFO:" | ||
echo "INFO: Override any of the non-derived values by setting the respective environment variable" | ||
echo "INFO: when starting Docker." | ||
echo "INFO:" | ||
|
||
export PATH=$SLURM_ROOT/bin:$PATH | ||
export LD_LIBRARY_PATH=$SLURM_ROOT/lib:$LD_LIBRARY_PATH | ||
export MANPATH=$SLURM_ROOT/man:$MANPATH | ||
|
||
( | ||
echo "NodeName=${NODE_NAMES} NodeHostname=${NODE_HOST} NodeAddr=${NODE_ADDR} Port=${NODE_PORTS} State=UNKNOWN ${NODE_HW}" | ||
echo "PartitionName=dkr Nodes=ALL Default=YES MaxTime=INFINITE State=UP" | ||
) \ | ||
| sed -e "s/SLURMCTLDHOST/${SLURMCTLD_HOST}/" \ | ||
-e "s/SLURMCTLDADDR/${SLURMCTLD_ADDR}/" \ | ||
$SLURM_CONF_IN - \ | ||
> $SLURM_CONF | ||
|
||
NODE_NAME_LIST=$(scontrol show hostnames $NODE_NAMES) | ||
|
||
for n in $NODE_NAME_LIST | ||
do | ||
echo "$NODE_ADDR $n" >> /etc/hosts | ||
done | ||
|
||
echo | ||
echo "Starting Slurm services..." | ||
echo | ||
|
||
$SLURM_ROOT/sbin/slurmctld | ||
|
||
for n in $NODE_NAME_LIST | ||
do | ||
$SLURM_ROOT/sbin/slurmd -N $n | ||
done | ||
|
||
echo | ||
sinfo | ||
echo | ||
echo | ||
|
||
exec "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/bin/bash -x | ||
# | ||
# Usage: install_slurm.sh <slurm-version> <install-prefix> [configure-args] | ||
# | ||
|
||
SLURM_VERSION=$1 | ||
SLURM_ROOT=$2 | ||
SLURM_CONFDIR=$3 | ||
shift; shift; shift | ||
ARGS=$* | ||
|
||
slurm_tar_file=slurm-${SLURM_VERSION}.tar.bz2 | ||
slurm_url=https://download.schedmd.com/slurm/${slurm_tar_file} | ||
|
||
|
||
if [ -z "$SLURM_VERSION" -o -z "$SLURM_ROOT" -o -z "$SLURM_CONFDIR" ]; | ||
then | ||
echo "Usage: install_slurm.sh <slurm-version> <install-prefix> <sysconf-dir> [configure-args]" | ||
echo "No Slurm version or install-prefix specified on command line. Aborting." | ||
exit 1 | ||
fi | ||
|
||
# | ||
# Download slurm tarball and unpack it | ||
# | ||
if true; then | ||
|
||
mkdir -p /opt/src || exit 1 | ||
( | ||
cd /opt/src | ||
|
||
if ! stat $slurm_tar_file; then | ||
echo "=== downloading slurm ${SLURM_VERSION} from ${slurm_url}" | ||
curl --fail --output ${slurm_tar_file} ${slurm_url} || exit 1 | ||
fi | ||
|
||
echo "=== unpacking $slurm_tar_file" | ||
tar -xjf ${slurm_tar_file} || exit 1 | ||
) | ||
|
||
fi | ||
|
||
if [ "$ARGS" = "NO_BUILD" ]; | ||
then | ||
exit 0 | ||
fi | ||
|
||
# | ||
# Remove any old build directory. | ||
# Run configure, make, make install | ||
# | ||
|
||
stat /opt/build/slurm-${SLURM_VERSION} && rm -rf /opt/build/slurm-${SLURM_VERSION} | ||
mkdir -p /opt/build/slurm-${SLURM_VERSION} || exit 1 | ||
( | ||
cd /opt/build/slurm-${SLURM_VERSION} | ||
/opt/src/slurm-${SLURM_VERSION}/configure --help | ||
/opt/src/slurm-${SLURM_VERSION}/configure \ | ||
--prefix=${SLURM_ROOT} \ | ||
--sysconfdir=${SLURM_CONFDIR} \ | ||
--disable-dependency-tracking \ | ||
$ARGS | ||
|
||
make -j4 && make install | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
# | ||
# Example slurm.conf file. Please run configurator.html | ||
# (in doc/html) to build a configuration file customized | ||
# for your environment. | ||
# | ||
# | ||
# slurm.conf file generated by configurator.html. | ||
# Put this file on all nodes of your cluster. | ||
# See the slurm.conf man page for more information. | ||
# | ||
ClusterName=cluster | ||
SlurmctldHost=SLURMCTLDHOST(SLURMCTLDADDR) | ||
#SlurmctldHost= | ||
# | ||
#DisableRootJobs=NO | ||
#EnforcePartLimits=NO | ||
#Epilog= | ||
#EpilogSlurmctld= | ||
#FirstJobId=1 | ||
#MaxJobId=67043328 | ||
#GresTypes= | ||
#GroupUpdateForce=0 | ||
#GroupUpdateTime=600 | ||
#JobFileAppend=0 | ||
#JobRequeue=1 | ||
#JobSubmitPlugins=lua | ||
#KillOnBadExit=0 | ||
#LaunchType=launch/slurm | ||
#Licenses=foo*4,bar | ||
#MailProg=/bin/mail | ||
#MaxJobCount=10000 | ||
#MaxStepCount=40000 | ||
#MaxTasksPerNode=512 | ||
MpiDefault=pmi2 | ||
#MpiParams=ports=#-# | ||
#PluginDir= | ||
#PlugStackConfig= | ||
#PrivateData=jobs | ||
#ProctrackType=proctrack/cgroup | ||
ProctrackType=proctrack/linuxproc | ||
#Prolog= | ||
#PrologFlags= | ||
#PrologSlurmctld= | ||
#PropagatePrioProcess=0 | ||
#PropagateResourceLimits= | ||
#PropagateResourceLimitsExcept= | ||
#RebootProgram= | ||
ReturnToService=1 | ||
SlurmctldPidFile=/var/run/slurmctld.pid | ||
SlurmctldPort=6817 | ||
SlurmdPidFile=/var/run/slurmd.%n.pid | ||
SlurmdPort=6818 | ||
SlurmdSpoolDir=/var/spool/slurmd.%n | ||
SlurmUser=slurm | ||
#SlurmdUser=root | ||
#SrunEpilog= | ||
#SrunProlog= | ||
StateSaveLocation=/var/spool/slurmctld | ||
SwitchType=switch/none | ||
#TaskEpilog= | ||
TaskPlugin=task/affinity | ||
#TaskProlog= | ||
#TopologyPlugin=topology/tree | ||
#TmpFS=/tmp | ||
#TrackWCKey=no | ||
#TreeWidth= | ||
#UnkillableStepProgram= | ||
#UsePAM=0 | ||
# | ||
# | ||
# TIMERS | ||
#BatchStartTimeout=10 | ||
#CompleteWait=0 | ||
#EpilogMsgTime=2000 | ||
#GetEnvTimeout=2 | ||
#HealthCheckInterval=0 | ||
#HealthCheckProgram= | ||
InactiveLimit=0 | ||
KillWait=30 | ||
#MessageTimeout=10 | ||
#ResvOverRun=0 | ||
MinJobAge=300 | ||
#OverTimeLimit=0 | ||
SlurmctldTimeout=120 | ||
SlurmdTimeout=300 | ||
#UnkillableStepTimeout=60 | ||
#VSizeFactor=0 | ||
Waittime=0 | ||
# | ||
# | ||
# SCHEDULING | ||
#DefMemPerCPU=0 | ||
#MaxMemPerCPU=0 | ||
#SchedulerTimeSlice=30 | ||
SchedulerType=sched/backfill | ||
SelectType=select/cons_tres | ||
SelectTypeParameters=CR_CPU | ||
# | ||
# | ||
# JOB PRIORITY | ||
#PriorityFlags= | ||
#PriorityType=priority/basic | ||
#PriorityDecayHalfLife= | ||
#PriorityCalcPeriod= | ||
#PriorityFavorSmall= | ||
#PriorityMaxAge= | ||
#PriorityUsageResetPeriod= | ||
#PriorityWeightAge= | ||
#PriorityWeightFairshare= | ||
#PriorityWeightJobSize= | ||
#PriorityWeightPartition= | ||
#PriorityWeightQOS= | ||
# | ||
# | ||
# LOGGING AND ACCOUNTING | ||
#AccountingStorageEnforce=0 | ||
#AccountingStorageHost= | ||
#AccountingStoragePass= | ||
#AccountingStoragePort= | ||
AccountingStorageType=accounting_storage/none | ||
#AccountingStorageUser= | ||
#AccountingStoreFlags= | ||
#JobCompHost= | ||
#JobCompLoc= | ||
#JobCompPass= | ||
#JobCompPort= | ||
JobCompType=jobcomp/none | ||
#JobCompUser= | ||
#JobContainerType=job_container/none | ||
JobAcctGatherFrequency=30 | ||
JobAcctGatherType=jobacct_gather/none | ||
SlurmctldDebug=debug2 | ||
SlurmctldLogFile=/var/log/slurm/slurmctld.log | ||
SlurmdDebug=debug2 | ||
SlurmdLogFile=/var/log/slurm/slurmd.%n.log | ||
#SlurmSchedLogFile= | ||
#SlurmSchedLogLevel= | ||
#DebugFlags= | ||
# | ||
# | ||
# POWER SAVE SUPPORT FOR IDLE NODES (optional) | ||
#SuspendProgram= | ||
#ResumeProgram= | ||
#SuspendTimeout= | ||
#ResumeTimeout= | ||
#ResumeRate= | ||
#SuspendExcNodes= | ||
#SuspendExcParts= | ||
#SuspendRate= | ||
#SuspendTime= | ||
# | ||
# | ||
# COMPUTE NODES | ||
#NodeName=nd[1-3] NodeHostname=DOCKER_HOSTNAME NodeAddr=127.0.0.1 Port=[6001-6003] CPUs=4 State=UNKNOWN | ||
#PartitionName=dkr Nodes=ALL Default=YES MaxTime=INFINITE State=UP |