From 5f3a321c60db6426a3db0864a89355e7e1c37299 Mon Sep 17 00:00:00 2001 From: Altan Orhon Date: Tue, 7 Nov 2023 11:57:40 -0800 Subject: [PATCH] Revert "Updated ORAS preloading" This reverts commit f7d0e8382a7dcb9c232d692779fa4ce5fd061cad. --- hyakvnc | 891 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 443 insertions(+), 448 deletions(-) diff --git a/hyakvnc b/hyakvnc index 4de5f21..1e0d83b 100755 --- a/hyakvnc +++ b/hyakvnc @@ -9,19 +9,19 @@ fi # Check Bash version 4.4 or greater: case "${BASH_VERSION:-0}" in - 4*) if [[ "${BASH_VERSINFO[1]:-0}" -lt 4 ]]; then - echo "Requires Bash version > 4.x" - exit 1 - fi ;; +4*) if [[ "${BASH_VERSINFO[1]:-0}" -lt 4 ]]; then + echo "Requires Bash version > 4.x" + exit 1 +fi ;; - *) ;; +*) ;; esac # Only enable these shell behaviours if we're not being sourced if ! (return 0 2>/dev/null); then [[ -n "${XDEBUG:-}" ]] && set -x # %% Set XDEBUG to print commands as they are executed - set -o pipefail # Use last non-zero exit code in a pipeline - set -o errtrace # Ensure the error trap handler is inherited + set -o pipefail # Use last non-zero exit code in a pipeline + set -o errtrace # Ensure the error trap handler is inherited # set -o errexit # Exit on error # shopt -qs inherit_errexit # Ensure subshells exit on error fi @@ -37,7 +37,7 @@ HYAKVNC_CONFIG_FILE="${HYAKVNC_DIR}/hyakvnc-config.env" # %% Configuration file # Load the hyakvnc configuration from the config file # This is high up in the file so that settings can be overridden by the user's config # Arguments: None -function hyakvnc_load_config() { +function hyakvnc_load_config { [[ -r "${HYAKVNC_CONFIG_FILE:-}" ]] || return 0 # Return if config file doesn't exist # Read each line of the parsed config file and export the variable: @@ -115,7 +115,7 @@ Log_Level_Colors=(["FATAL"]=5 ["ERROR"]=1 ["WARN"]=3 ["INFO"]=4 ["DEBUG"]=6 ["TR # check_log_level() # Check if the current log level is high enough to log a message # Arguments: -function check_log_level() { +function check_log_level { local level levelno refloglevel refloglevelno level="${1:-INFO}" refloglevel="${2:-${HYAKVNC_LOG_LEVEL:-INFO}}" @@ -141,7 +141,7 @@ function check_log_level() { # $HYAKVNC_LOG_LEVEL - The log level to use for interactive output (default: INFO) # $HYAKVNC_LOG_FILE - The log file to use (default: $HYAKVNC_DIR/hyakvnc.log) # $HYAKVNC_LOG_FILE_LEVEL - The log level to use for log file output (default: DEBUG) -function log() { +function log { local level levelno colorno curlevelno curlogfilelevelno funcname logfilefuncname curloglevel curlogfilelevel [[ $# -lt 1 ]] && return 1 level="${1:-}" @@ -216,7 +216,7 @@ function hyakvnc_pull_updates() { # Check if a hyakvnc update is available # Arguments: None # Returns: 0 if an update is available, 1 if none or if an error occurred -function hyakvnc_check_updates() { +function hyakvnc_check_updates { log DEBUG "Checking for updates... " # Check if git is installed: check_command git ERROR || return 1 @@ -360,7 +360,7 @@ function ghcr_get_oras_sif() { # apply the update. # Arguments: None # Returns: 0 if an update is available and the user wants to update, 1 if none or if an error occurred -function hyakvnc_autoupdate() { +function hyakvnc_autoupdate { if [[ "${HYAKVNC_CHECK_UPDATE_FREQUENCY:-0}" == "-1" ]]; then log DEBUG "Skipping update check" return 1 @@ -372,16 +372,16 @@ function hyakvnc_autoupdate() { local find_m_arg=() case "${update_frequency_unit:=d}" in - d) - find_m_arg+=(-mtime "+${update_frequency_value:=0}") - ;; - m) - find_m_arg+=(-mmin "+${update_frequency_value:=0}") - ;; - *) - log ERROR "Invalid update frequency unit: ${update_frequency_unit}. Please use [d]ays or [m]inutes." - return 1 - ;; + d) + find_m_arg+=(-mtime "+${update_frequency_value:=0}") + ;; + m) + find_m_arg+=(-mmin "+${update_frequency_value:=0}") + ;; + *) + log ERROR "Invalid update frequency unit: ${update_frequency_unit}. Please use [d]ays or [m]inutes." + return 1 + ;; esac log DEBUG "Checking if ${HYAKVNC_REPO_DIR}/.last_update_check is older than ${update_frequency_value}${update_frequency_unit}..." @@ -400,36 +400,36 @@ function hyakvnc_autoupdate() { } if [[ -t 0 ]]; then # Check if we're running interactively - while true; do # Ask user if they want to update + while true; do # Ask user if they want to update local choice read -r -p "Would you like to update hyakvnc? [y/n] [x to disable]: " choice case "${choice}" in - y | Y | yes | Yes) - log INFO "Updating hyakvnc..." - hyakvnc_pull_updates || { - log WARN "Didn't update hyakvnc" - return 1 - } - log INFO "Successfully updated hyakvnc. Restarting..." - echo - exec "${0}" "${@}" # Restart hyakvnc - ;; - n | N | no | No) - log INFO "Not updating hyakvnc" - return 1 - ;; - x | X) - log INFO "Disabling update checks" - export HYAKVNC_CHECK_UPDATE_FREQUENCY="-1" - if [[ -n "${HYAKVNC_CONFIG_FILE:-}" ]]; then - touch "${HYAKVNC_CONFIG_FILE}" && echo 'HYAKVNC_CHECK_UPDATE_FREQUENCY=-1' >>"${HYAKVNC_CONFIG_FILE}" - log INFO "Set HYAKVNC_CHECK_UPDATE_FREQUENCY=-1 in ${HYAKVNC_CONFIG_FILE}" - fi + y | Y | yes | Yes) + log INFO "Updating hyakvnc..." + hyakvnc_pull_updates || { + log WARN "Didn't update hyakvnc" return 1 - ;; - *) - echo "Please enter y, n, or x" - ;; + } + log INFO "Successfully updated hyakvnc. Restarting..." + echo + exec "${0}" "${@}" # Restart hyakvnc + ;; + n | N | no | No) + log INFO "Not updating hyakvnc" + return 1 + ;; + x | X) + log INFO "Disabling update checks" + export HYAKVNC_CHECK_UPDATE_FREQUENCY="-1" + if [[ -n "${HYAKVNC_CONFIG_FILE:-}" ]]; then + touch "${HYAKVNC_CONFIG_FILE}" && echo 'HYAKVNC_CHECK_UPDATE_FREQUENCY=-1' >>"${HYAKVNC_CONFIG_FILE}" + log INFO "Set HYAKVNC_CHECK_UPDATE_FREQUENCY=-1 in ${HYAKVNC_CONFIG_FILE}" + fi + return 1 + ;; + *) + echo "Please enter y, n, or x" + ;; esac done else @@ -448,7 +448,7 @@ function hyakvnc_autoupdate() { # Arguments: # - - The command to check # - - Passed to log if the command is not available (optional) -function check_command() { +function check_command { if [[ -z "${1:-}" ]] || ! command -v "${1}" >/dev/null 2>&1; then [[ $# -gt 1 ]] && log "${@:2}" return 1 @@ -461,14 +461,14 @@ function check_command() { # check_slurm_running { # Check if SLURM is running # Arguments: None -function check_slurm_running() { +function check_slurm_running { sinfo >/dev/null 2>&1 || return 1 } # expand_slurm_node_range() # Expand a SLURM node range to a list of nodes # Arguments: -function expand_slurm_node_range() { +function expand_slurm_node_range { [[ -z "${1:-}" ]] && return 1 result=$(scontrol show hostnames --oneliner "${1}" | grep -oE '^.+$' | tr ' ' '\n') || return 1 echo "${result}" && return 0 @@ -477,7 +477,7 @@ function expand_slurm_node_range() { # get_slurm_job_info() # Get info about a SLURM job, given a list of job IDs # Arguments: [] -function get_slurm_job_info() { +function get_slurm_job_info { [[ $# -eq 0 ]] && { log ERROR "User or Job ID must be specified" return 1 @@ -504,7 +504,7 @@ function get_slurm_job_info() { # get_squeue_job_status() # Get the status of a SLURM job, given a job ID # Arguments: -function get_squeue_job_status() { +function get_squeue_job_status { local jobid="${1:-}" [[ -z "${jobid}" ]] && { log ERROR "Job ID must be specified" @@ -519,7 +519,7 @@ function get_squeue_job_status() { # get_slurm_hyak_qos() # Return the correct QOS on Hyak for the given partition on hyak # Arguments: -function get_slurm_hyak_qos() { +function get_slurm_hyak_qos { # Logic copied from hyakalloc's hyakqos.py:QosResource.__init__(): local qos_name qos_suffix qos_name="${1:-}" @@ -540,7 +540,7 @@ function get_slurm_hyak_qos() { # hyakvnc_config_init() # Initialize the hyakvnc configuration # Arguments: None -function hyakvnc_config_init() { +function hyakvnc_config_init { mkdir -p "${HYAKVNC_DIR}/jobs" "${HYAKVNC_SLURM_OUTPUT_DIR}" || { log ERROR "Failed to create HYAKVNC jobs directory ${HYAKVNC_DIR}/jobs" return 1 @@ -599,22 +599,22 @@ function hyakvnc_config_init() { # stop_hyakvnc_session() # Stop a Hyak VNC session, given a job ID # Arguments: [ -c | --cancel ] [ --no-rm ] -function stop_hyakvnc_session() { +function stop_hyakvnc_session { local jobid should_cancel no_rm while true; do case ${1:-} in - -c | --cancel) - shift - should_cancel=1 - ;; - --no-rm) # Don't remove the job directory - shift - no_rm=1 - ;; - *) - jobid="${1:-}" - break - ;; + -c | --cancel) + shift + should_cancel=1 + ;; + --no-rm) # Don't remove the job directory + shift + no_rm=1 + ;; + *) + jobid="${1:-}" + break + ;; esac done @@ -662,40 +662,40 @@ function stop_hyakvnc_session() { # # The generated connection string should look like this, depending on the the OS: # ssh -f -L 6111:'/mmfs1/home/altan/.hyakvnc/jobs/14930429/socket.uds' -J altan@klone.hyak.uw.edu altan@g3071 sleep 10; vncviewer localhost:6111 -function print_connection_info() { +function print_connection_info { local jobid jobdir node socket_path viewer_port launch_hostname ssh_host viewer_port="${HYAKVNC_LOCALHOST_PORT:-5901}" ssh_host="${HYAKVNC_SSH_HOST:-klone.hyak.uw.edu}" # Parse arguments: while true; do case ${1:-} in - -j | --jobid) - shift - jobid="${1:-}" - shift - ;; - -p | --viewer-port) - shift - viewer_port="${1:-viewer_port}" - shift - ;; - -n | --node) - shift - node="${1:-}" - shift - ;; - -s | --ssh-host) - shift - ssh_host="${1:-}" - shift - ;; - -*) - log ERROR "Unknown option for print_connection_info: ${1:-}\n" - return 1 - ;; - *) - break - ;; + -j | --jobid) + shift + jobid="${1:-}" + shift + ;; + -p | --viewer-port) + shift + viewer_port="${1:-viewer_port}" + shift + ;; + -n | --node) + shift + node="${1:-}" + shift + ;; + -s | --ssh-host) + shift + ssh_host="${1:-}" + shift + ;; + -*) + log ERROR "Unknown option for print_connection_info: ${1:-}\n" + return 1 + ;; + *) + break + ;; esac done @@ -792,7 +792,7 @@ EOF # cleanup_launched_jobs_and_exit() # Cancel any jobs that were launched and exit -function cleanup_launched_jobs_and_exit() { +function cleanup_launched_jobs_and_exit { local jobdir jobid log WARN "Interrupted. Cleaning up and exiting!" # Cancel any jobs that were launched: @@ -802,7 +802,7 @@ function cleanup_launched_jobs_and_exit() { scancel "${jobid}" || log ERROR "scancel failed to cancel job ${jobid}" [[ -d "${jobdir}" ]] && rm -rf "${jobdir}" && log DEBUG "Removed job directory ${jobdir}" done - kill -TERM %tail 2>/dev/null # Stop following the SLURM log file + kill -TERM %tail 2>/dev/null # Stop following the SLURM log file trap - SIGINT SIGTERM SIGHUP SIGABRT SIGQUIT ERR EXIT # Remove traps exit 1 } @@ -816,8 +816,8 @@ function cleanup_launched_jobs_and_exit() { # - output_path: Directory or path to save the image to (optional) # Returns: 0 if successful, 1 if not or if an error occurred # Prints: The token to stdout -function ghcr_get_oras_sif() { - check_command curl || return 1 # Check if curl is installed +function ghcr_get_oras_sif { + check_command curl || return 1 # Check if curl is installed check_command python3 || return 1 # Check if python3 is installed local url output_path [[ -z "${url:=${1:-}}" ]] && { @@ -833,21 +833,21 @@ function ghcr_get_oras_sif() { # Check that the URL is an ORAS GitHub Container Registry URL: local address image_ref repo image_tag case "${url}" in - oras://ghcr.io/*) - address="${url#oras://}" - image_ref="${address#ghcr.io/}" - repo="${image_ref%%:*}" - [[ -z "${repo}" ]] && { - log ERROR "Failed to parse repository from URL \"${url}\"" - return 1 - } - [[ ${image_ref} == *:* ]] && image_tag="${image_ref##*:}" - image_tag="${image_tag:-latest}" - ;; - *) # Not a GitHub Container Registry URL - log ERROR "URL \"${url}\" is not a GitHub Container Registry URL for an ORAS image" + oras://ghcr.io/*) + address="${url#oras://}" + image_ref="${address#ghcr.io/}" + repo="${image_ref%%:*}" + [[ -z "${repo}" ]] && { + log ERROR "Failed to parse repository from URL \"${url}\"" return 1 - ;; + } + [[ ${image_ref} == *:* ]] && image_tag="${image_ref##*:}" + image_tag="${image_tag:-latest}" + ;; + *) # Not a GitHub Container Registry URL + log ERROR "URL \"${url}\" is not a GitHub Container Registry URL for an ORAS image" + return 1 + ;; esac # Get a token for the repository (required to get the manifest, but freely available by this request): @@ -913,7 +913,7 @@ function ghcr_get_oras_sif() { # ## Command: create # help_create() -function help_create() { +function help_create { cat <>"${HOME}/.zshenv" && log INFO "Added APPTAINER_CACHEDIR to ~/.zshenv" - else - echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${ZDOTDIR:-${HOME}}/.zshrc" && log INFO "Added APPTAINER_CACHEDIR to ${ZDOTDIR:-~}/.zshrc" - fi - # Check if using Bash: - elif [[ -n "${BASH_VERSION:-}" ]]; then - echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${HOME}/.bashrc" && log INFO "Added APPTAINER_CACHEDIR to ~/.bashrc" - # Write to ~/.profile if we can't determine shell type: + y | Y) + # Check if using ZSH: + if [[ -n "${ZSH_VERSION:-}" ]]; then + if [[ -w "${HOME}/.zshenv}" ]]; then + echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${HOME}/.zshenv" && log INFO "Added APPTAINER_CACHEDIR to ~/.zshenv" else - log INFO "Could not determine shell type. Adding APPTAINER_CACHEDIR to ~/.profile." - echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${HOME}/.profile" && log INFO "Added APPTAINER_CACHEDIR to ~/.profile" + echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${ZDOTDIR:-${HOME}}/.zshrc" && log INFO "Added APPTAINER_CACHEDIR to ${ZDOTDIR:-~}/.zshrc" fi - break - ;; - - n | N) - log WARN "Not adding APPTAINER_CACHEDIR to your shell's startup file. You may need to do this again in the future." - break - ;; - *) - log ERROR "Invalid choice ${choice2:-}." - ;; + # Check if using Bash: + elif [[ -n "${BASH_VERSION:-}" ]]; then + echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${HOME}/.bashrc" && log INFO "Added APPTAINER_CACHEDIR to ~/.bashrc" + # Write to ~/.profile if we can't determine shell type: + else + log INFO "Could not determine shell type. Adding APPTAINER_CACHEDIR to ~/.profile." + echo "export APPTAINER_CACHEDIR=\"${newcachedir}\"" >>"${HOME}/.profile" && log INFO "Added APPTAINER_CACHEDIR to ~/.profile" + fi + break + ;; + + n | N) + log WARN "Not adding APPTAINER_CACHEDIR to your shell's startup file. You may need to do this again in the future." + break + ;; + *) + log ERROR "Invalid choice ${choice2:-}." + ;; esac done fi @@ -1161,20 +1161,15 @@ function cmd_create() { # Preload ORAS images if requested: if [[ "${HYAKVNC_APPTAINER_GHCR_ORAS_PRELOAD:-1}" == 1 ]]; then - case "${HYAKVNC_APPTAINER_CONTAINER}" in - oras://*) - local oras_cache_dir oras_image_path - oras_cache_dir="${APPTAINER_CACHEDIR:-${HOME}/.apptainer/cache}/cache/oras" - if mkdir -p "${oras_cache_dir}"; then - log INFO "Preloading ORAS image for \"${HYAKVNC_APPTAINER_CONTAINER}\"" - oras_image_path="$(ghcr_get_oras_sif "${HYAKVNC_APPTAINER_CONTAINER}" "${APPTAINER_CACHEDIR}/cache/oras" || true)" - [[ -z "${oras_image_path:-}" ]] && log ERROR "hyakvnc failed to preload ORAS image for \"${HYAKVNC_APPTAINER_CONTAINER:-}\" on its own. Apptainer will try to download the image by itself. If you don't want to preload ORAS images, use the --no-ghcr-oras-preload option." - else - log ERROR "Failed to create directory ${oras_cache_dir}." - fi - ;; - *) ;; - esac + local oras_cache_dir oras_image_path + oras_cache_dir="${APPTAINER_CACHEDIR:-${HOME}/.apptainer/cache}/cache/oras" + if mkdir -p "${oras_cache_dir}"; then + log INFO "Preloading ORAS image for \"${HYAKVNC_APPTAINER_CONTAINER}\"" + oras_image_path="$(ghcr_get_oras_sif "${HYAKVNC_APPTAINER_CONTAINER}" "${APPTAINER_CACHEDIR}/cache/oras" || true)" + [[ -z "${oras_image_path:-}" ]] && log ERROR "hyakvnc failed to preload ORAS image for \"${HYAKVNC_APPTAINER_CONTAINER:-}\" on its own. Apptainer will try to download the image by itself. If you don't want to preload ORAS images, use the --no-ghcr-oras-preload option." + else + log ERROR "Failed to create directory ${oras_cache_dir}." + fi fi export HYAKVNC_SLURM_JOB_NAME="${HYAKVNC_SLURM_JOB_PREFIX}${container_name}" @@ -1209,10 +1204,10 @@ function cmd_create() { [[ -n "${HYAKVNC_APPTAINER_ADD_ARGS:-}" ]] && apptainer_start_args+=("${HYAKVNC_APPTAINER_ADD_ARGS[@]}") case "${HYAKVNC_APPTAINER_CLEANENV:-}" in - 1 | true | yes | y | Y | TRUE | YES) - apptainer_start_args+=("--cleanenv") - ;; - *) ;; + 1 | true | yes | y | Y | TRUE | YES) + apptainer_start_args+=("--cleanenv") + ;; + *) ;; esac # Final command should look like: @@ -1272,19 +1267,19 @@ function cmd_create() { sleep 1 squeue_result=$(squeue --job "${launched_jobid}" --format "%T" --noheader || true) case "${squeue_result:-}" in - SIGNALING | PENDING | CONFIGURING | STAGE_OUT | SUSPENDED | REQUEUE_HOLD | REQUEUE_FED | RESV_DEL_HOLD | STOPPED | RESIZING | REQUEUED) - log TRACE "Job ${launched_jobid} is in a state that could potentially run: ${squeue_result}" - sleep 1 - continue - ;; - RUNNING) - log DEBUG "Job ${launched_jobid} is ${squeue_result}" - break - ;; - *) - log ERROR "Job ${launched_jobid} is in unexpected state ${squeue_result}" - exit 1 - ;; + SIGNALING | PENDING | CONFIGURING | STAGE_OUT | SUSPENDED | REQUEUE_HOLD | REQUEUE_FED | RESV_DEL_HOLD | STOPPED | RESIZING | REQUEUED) + log TRACE "Job ${launched_jobid} is in a state that could potentially run: ${squeue_result}" + sleep 1 + continue + ;; + RUNNING) + log DEBUG "Job ${launched_jobid} is ${squeue_result}" + break + ;; + *) + log ERROR "Job ${launched_jobid} is in unexpected state ${squeue_result}" + exit 1 + ;; esac done @@ -1312,20 +1307,20 @@ function cmd_create() { fi case "${HYAKVNC_APPTAINER_CONTAINER}" in - library://* | docker://* | shub://* | oras://* | http://* | https://*) - local protocol="${HYAKVNC_APPTAINER_CONTAINER#*://}" - if [[ -n "${protocol:-}" ]]; then - # Wait for the container to start downloading: - log INFO "Downloading ${HYAKVNC_APPTAINER_CONTAINER}..." - until grep -q -iE '(Download|cached).*image' "${jobdir}/slurm.log"; do - sleep 1 - done - # Wait for the container to stop downloading: - # shellcheck disable=SC2016 - srun --jobid "${launched_jobid}" --output /dev/null sh -c 'while pgrep -u $USER -fia '"'"'^.*apptainer.*jobs/'"${launched_jobid}"'.*'"${protocol}""'"' | grep -v "^$$"; do sleep 1; done' || log WARN "Couldn't poll for container download process for ${HYAKVNC_APPTAINER_CONTAINER}" - fi - ;; - *) ;; + library://* | docker://* | shub://* | oras://* | http://* | https://*) + local protocol="${HYAKVNC_APPTAINER_CONTAINER#*://}" + if [[ -n "${protocol:-}" ]]; then + # Wait for the container to start downloading: + log INFO "Downloading ${HYAKVNC_APPTAINER_CONTAINER}..." + until grep -q -iE '(Download|cached).*image' "${jobdir}/slurm.log"; do + sleep 1 + done + # Wait for the container to stop downloading: + # shellcheck disable=SC2016 + srun --jobid "${launched_jobid}" --output /dev/null sh -c 'while pgrep -u $USER -fia '"'"'^.*apptainer.*jobs/'"${launched_jobid}"'.*'"${protocol}""'"' | grep -v "^$$"; do sleep 1; done' || log WARN "Couldn't poll for container download process for ${HYAKVNC_APPTAINER_CONTAINER}" + fi + ;; + *) ;; esac log INFO "Waiting for VNC server to start..." @@ -1363,7 +1358,7 @@ function cmd_create() { # ## COMMAND: status # help_status() -function help_status() { +function help_status { cat <