From fee6553c5c6fbd4ab150081b4846e657adb14294 Mon Sep 17 00:00:00 2001 From: Pierre Neyron Date: Mon, 23 Dec 2024 11:40:55 +0100 Subject: [PATCH] [oarsh] add pam_oar_adpopt --- sources/core/tools/oarsh-legacy/README.md | 77 +++++++++++ sources/core/tools/oarsh-legacy/pam_oar_adopt | 123 ++++++++++++++++++ sources/core/tools/oarsh/pam_oar_adopt | 123 ++++++++++++++++++ 3 files changed, 323 insertions(+) create mode 100644 sources/core/tools/oarsh-legacy/README.md create mode 100755 sources/core/tools/oarsh-legacy/pam_oar_adopt create mode 100755 sources/core/tools/oarsh/pam_oar_adopt diff --git a/sources/core/tools/oarsh-legacy/README.md b/sources/core/tools/oarsh-legacy/README.md new file mode 100644 index 00000000..581cdea6 --- /dev/null +++ b/sources/core/tools/oarsh-legacy/README.md @@ -0,0 +1,77 @@ +OARSH +===== +`oarsh` is the connector of OAR, that can be used in place of `ssh` in order to connect to reserved nodes. + +oarsh and oarsh_shell +--------------------- +`oarsh` overloads `ssh`, with the same command line options. + +`oarsh` underneath connects to nodes using the oar user via `ssh` (port `6667`). + +The oar user uses a special shell called `oarsh_shell`, that does the OAR machinery. + +`oarsh` will place the processes it creates in the job's cgroup. + +`oarsh` also supports "sub-jobs" or "tasks", allowing partitioning the job resources in smaller sets to execute on, e.g. per core or GPU while in a job containing several cores or GPUs. This is achieved using the `OAR_USER_CPUSET` and `OAR_USER_GPUDEVICE` environment variables + +One can alias or symlink `ssh` to `oarsh` (e.g. '~/bin/ssh -> /usb/bin/oarsh'), in order to use `oarsh` as `ssh` seamlessly. + +`oarsh_shell` is also used by `oarsub` when creating an interactive job. + +ssh and PAM +----------- + +PAM can be configured to have users' `ssh` (real `ssh`, not via `oarsh`) connect nodes and place the created processes in the job's cgroup. + +This uses `pam_exec.so` with the `pam_oar_adopt` script + +If a user reserved a node, PAM will find out the job's cgroup and place the process in it. It will also load the job's environment variables. + +If a user tries to connect to a node that he did not reserve or reserved multiple times (e.g. 2 different jobs reserving each a subset of the node's cores), nothing will be done (`ssh` may fail if configured so via `pam_access.so`). + +### PAM configuration example (debian): + +#### `/etc/pam.d/common-account` +``` +account sufficient pam_exec.so quiet debug stdout /usr/sbin/pam_oar_adopt -a +account sufficient pam_access.so accessfile=/etc/security/access.conf +account required pam_access.so accessfile=/var/lib/oar/access.conf + +account sufficient pam_ldap.so +account required pam_unix.so +``` + +#### `cat common-session` +``` +# here are the per-package modules (the "Primary" block) +session [default=1] pam_permit.so +# here's the fallback if no module succeeds +session requisite pam_deny.so +# prime the stack with a positive return value if there isn't one already; +# this avoids us returning an error just because nothing sets a success code +# since the modules above will each just jump around +session required pam_permit.so +# and here are more per-package modules (the "Additional" block) +session required pam_unix.so +session [success=ok default=ignore] pam_ldap.so minimum_uid=1000 +session optional pam_systemd.so +session required pam_exec.so stdout /usr/local/sbin/pam_oar_adopt -s +session optional pam_env.so readenv=1 envfile=/var/lib/oar/pam.env +``` + +#### `common-session-noninteractive` +``` +# here are the per-package modules (the "Primary" block) +session [default=1] pam_permit.so +# here's the fallback if no module succeeds +session requisite pam_deny.so +# prime the stack with a positive return value if there isn't one already; +# this avoids us returning an error just because nothing sets a success code +# since the modules above will each just jump around +session required pam_permit.so +# and here are more per-package modules (the "Additional" block) +session required pam_unix.so +session [success=ok default=ignore] pam_ldap.so minimum_uid=1000 +session required pam_exec.so seteuid stdout /usr/local/sbin/pam_oar_adopt -s +session optional pam_env.so readenv=1 envfile=/var/lib/oar/pam.env +``` diff --git a/sources/core/tools/oarsh-legacy/pam_oar_adopt b/sources/core/tools/oarsh-legacy/pam_oar_adopt new file mode 100755 index 00000000..b0e89425 --- /dev/null +++ b/sources/core/tools/oarsh-legacy/pam_oar_adopt @@ -0,0 +1,123 @@ +#!/bin/sh +# +# pam_oar_adopt is a PAM module that adopts processes launched under ssh +# connections made by users. The processes will be moved inside the correct +# job cgroup, if the user owns all cores of a node in one OAR job. +# If user has multiple jobs on node or one job with only a part of available +# cores, an error is thrown. In that case, `oarsh` must be used. +# +set -eu + +CGROUP_MOUNT_POINT="/dev/oar_cgroups_links" +OAR_CPUSETS_BASE="${CGROUP_MOUNT_POINT}/cpuset/oar" + +get_user_cgroups() { + ls -d ${OAR_CPUSETS_BASE}/${1}_* 2>/dev/null | awk -F / '{ ORS=" "; print $NF }' +} + +pam_account() { + if [ -z "${PAM_USER+x}" ]; then + echo "Please launch this module via PAM" + exit 1 + fi + + # We exit if the pam service is su, we don't want to have the error + # message when using su. + if [ "${PAM_SERVICE}" = "su-l" ]; then + exit 0 + fi + + # Exit if the user id is inferior than 1000 (system user), indeed there is + # no need to do OAR cgroups machinery in that case. + if [ $(getent passwd "${PAM_USER}" | awk -F: '{ print $3 }') -lt 1000 ]; then + exit 0 + fi + + get_vars $PAM_USER + test_pam_activation + + # Four cases: + # - the connecting user is oar or root, we fail silently (since we are in 'sufficient' mode) + # - the user has no cgroups (= no jobs) on node + # - the user has more than one cgroup or one but without all cores + # - the user has one cgroup with all cores + if [ ${PAM_USER} = "oar" ] || [ ${PAM_USER} = "root" ] || [ ${PAM_USER} = "vagrant" ]; then + exit 1 + elif [ -z "${USER_CGROUPS+x}" ]; then + echo "No running job for user ${PAM_USER} on this node." >&2 + exit 1 + elif [ $(echo "${USER_CGROUPS}" | awk '{ print NF}') -ne 1 ] || + [ $(cat ${OAR_CPUSETS_BASE}/$(echo -n ${USER_CGROUPS})/cpuset.cpus) != $ALL_CPUSETS ]; then + cat << EOF >&2 +Cannot connect to node using 'ssh' because not all its CPU cores are assigned to the job which reserves it. +Reserve the whole node, or use 'oarsh' instead. +EOF + exit 1 + else + exit 0 + fi +} + +pam_session() { + if [ -z "${PAM_TYPE+x}" ]; then + echo "Please launch this module via PAM" + exit 1 + fi + + # Exit if not a login + if [ "${PAM_TYPE}" != "open_session" ]; then + exit 0 + fi + + G5K_USER=${PAM_RUSER:-$PAM_USER} + get_vars $G5K_USER + + # We could not find a running OAR job for this user on this node. It probably means that + # the user connecting is either root or oar (for example because of oarsh). + # We do nothing in that case. + if [ -z "${USER_CGROUPS}" ]; then + exit 0 + fi + + # To have job's environment variables, we create a symkink to the already + # created (by oarsh) environment file. pam_env while then load it. + ln -fs /var/lib/oar/$(echo -n ${USER_CGROUPS}).env /var/lib/oar/pam.env + + PIDS="$(ps -o ppid= $$)" + for pid in $PIDS; do + for cgroup in $CGROUP_MOUNT_POINT/*; do + echo $pid > "${cgroup}/oar/$(echo -n ${USER_CGROUPS})/tasks" + done + done +} + +test_pam_activation() { + # We test if the pam module should perform verification. This file is created + # by g5k-postinstall when required (i.e. node is running a non user deployed + # std env). + if [ ! -f "/etc/oar/pam_activated" ]; then + exit 0 + fi +} + +get_vars() { + USER_CGROUPS=$(get_user_cgroups $1) + ALL_CPUSETS=$(cat ${OAR_CPUSETS_BASE}/cpuset.cpus 2> /dev/null || true) +} + +[ $# -eq 0 ] && echo "Please provide mode" && exit 1 + +while getopts ":as" opt; do + case $opt in + "s") + pam_session + ;; + "a") + pam_account + ;; + *) + echo "Unknown mode" + exit 1 + ;; + esac +done diff --git a/sources/core/tools/oarsh/pam_oar_adopt b/sources/core/tools/oarsh/pam_oar_adopt new file mode 100755 index 00000000..b0e89425 --- /dev/null +++ b/sources/core/tools/oarsh/pam_oar_adopt @@ -0,0 +1,123 @@ +#!/bin/sh +# +# pam_oar_adopt is a PAM module that adopts processes launched under ssh +# connections made by users. The processes will be moved inside the correct +# job cgroup, if the user owns all cores of a node in one OAR job. +# If user has multiple jobs on node or one job with only a part of available +# cores, an error is thrown. In that case, `oarsh` must be used. +# +set -eu + +CGROUP_MOUNT_POINT="/dev/oar_cgroups_links" +OAR_CPUSETS_BASE="${CGROUP_MOUNT_POINT}/cpuset/oar" + +get_user_cgroups() { + ls -d ${OAR_CPUSETS_BASE}/${1}_* 2>/dev/null | awk -F / '{ ORS=" "; print $NF }' +} + +pam_account() { + if [ -z "${PAM_USER+x}" ]; then + echo "Please launch this module via PAM" + exit 1 + fi + + # We exit if the pam service is su, we don't want to have the error + # message when using su. + if [ "${PAM_SERVICE}" = "su-l" ]; then + exit 0 + fi + + # Exit if the user id is inferior than 1000 (system user), indeed there is + # no need to do OAR cgroups machinery in that case. + if [ $(getent passwd "${PAM_USER}" | awk -F: '{ print $3 }') -lt 1000 ]; then + exit 0 + fi + + get_vars $PAM_USER + test_pam_activation + + # Four cases: + # - the connecting user is oar or root, we fail silently (since we are in 'sufficient' mode) + # - the user has no cgroups (= no jobs) on node + # - the user has more than one cgroup or one but without all cores + # - the user has one cgroup with all cores + if [ ${PAM_USER} = "oar" ] || [ ${PAM_USER} = "root" ] || [ ${PAM_USER} = "vagrant" ]; then + exit 1 + elif [ -z "${USER_CGROUPS+x}" ]; then + echo "No running job for user ${PAM_USER} on this node." >&2 + exit 1 + elif [ $(echo "${USER_CGROUPS}" | awk '{ print NF}') -ne 1 ] || + [ $(cat ${OAR_CPUSETS_BASE}/$(echo -n ${USER_CGROUPS})/cpuset.cpus) != $ALL_CPUSETS ]; then + cat << EOF >&2 +Cannot connect to node using 'ssh' because not all its CPU cores are assigned to the job which reserves it. +Reserve the whole node, or use 'oarsh' instead. +EOF + exit 1 + else + exit 0 + fi +} + +pam_session() { + if [ -z "${PAM_TYPE+x}" ]; then + echo "Please launch this module via PAM" + exit 1 + fi + + # Exit if not a login + if [ "${PAM_TYPE}" != "open_session" ]; then + exit 0 + fi + + G5K_USER=${PAM_RUSER:-$PAM_USER} + get_vars $G5K_USER + + # We could not find a running OAR job for this user on this node. It probably means that + # the user connecting is either root or oar (for example because of oarsh). + # We do nothing in that case. + if [ -z "${USER_CGROUPS}" ]; then + exit 0 + fi + + # To have job's environment variables, we create a symkink to the already + # created (by oarsh) environment file. pam_env while then load it. + ln -fs /var/lib/oar/$(echo -n ${USER_CGROUPS}).env /var/lib/oar/pam.env + + PIDS="$(ps -o ppid= $$)" + for pid in $PIDS; do + for cgroup in $CGROUP_MOUNT_POINT/*; do + echo $pid > "${cgroup}/oar/$(echo -n ${USER_CGROUPS})/tasks" + done + done +} + +test_pam_activation() { + # We test if the pam module should perform verification. This file is created + # by g5k-postinstall when required (i.e. node is running a non user deployed + # std env). + if [ ! -f "/etc/oar/pam_activated" ]; then + exit 0 + fi +} + +get_vars() { + USER_CGROUPS=$(get_user_cgroups $1) + ALL_CPUSETS=$(cat ${OAR_CPUSETS_BASE}/cpuset.cpus 2> /dev/null || true) +} + +[ $# -eq 0 ] && echo "Please provide mode" && exit 1 + +while getopts ":as" opt; do + case $opt in + "s") + pam_session + ;; + "a") + pam_account + ;; + *) + echo "Unknown mode" + exit 1 + ;; + esac +done