Skip to content

Commit

Permalink
Merge branch 'NOAA-EMC:develop' into aws-forecast-only
Browse files Browse the repository at this point in the history
  • Loading branch information
weihuang-jedi authored Aug 5, 2024
2 parents f900893 + d599fff commit f599cd7
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 23 deletions.
143 changes: 125 additions & 18 deletions ci/scripts/utils/launch_java_agent.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,70 @@
#!/bin/env bash

set -e

# ==============================================================================
# Script Name: launch_java_agent.sh
#
# Description:
# This script automates the process of launching a Jenkins agent
# on a specified machine. It ensures that the necessary
# prerequisites are met, such as the availability of JAVA_HOME,
# the Jenkins agent launch directory, and proper authentication
# with GitHub.
#
# It then proceeds to check if the Jenkins node is online and
# decides whether to launch the Jenkins agent based on the node's
# status. The agent is launched in the background,
# and its PID is logged for reference.
#
# Prerequisites:
# JAVA_HOME must be set to a valid JDK installation.
# Jenkins agent launch directory must exist and be specified.
# GitHub CLI (gh) must be installed and authenticated for messeging
# from the Jenkins controller to GitHub PR via shell commands.
# Jenkins agent launch directory must exist and be specified.
# TODO: Must use GitHub CLI v2.25.1 (newer versoins have issues)
# https://github.com/cli/cli/releases/download/v2.25.1/gh_2.25.1_linux_amd64.tar.gz
# Jenkins controller URL and authentication token must be provided.
# jenkins-secret-file:
# Must be present in the Jenkins agent launch directory.
# This file contains the secret key for the Jenkins agent
# established by the Jenkins administrator for each Node.
# jenkins_token:
# Must be present in the Jenkins agent launch directory.
# This file contains the user authentication token for the Jenkins controller
# to use the Remote API. This token can be generated by the user
# on the Jenkins controller.
# controller_user:
# Must be set to the Jenkins controller username corresponing to the jenkins_token.
#
# Usage: ./launch_java_agent.sh [now] [-f]
# The optional 'now' argument forces the script to launch the Jenkins
# agent without waiting before trying again.
# The optional 'force' argument forces the script to launch the Jenkins regarless of the node status.
#
# ==============================================================================

force_launch="False"
skip_wait="False"
while getopts ":fnh" flag; do
case "${flag}" in
f) force_launch="True";;
n) skip_wait="True";;
h) echo "Usage: ./launch_java_agent.sh [now] [force]
Two mutually exclusive optional arguments:
-n (now) causes the script to launch the Jenkins agent without waiting before trying again.
-f (force) forces the script to launch the Jenkins regarless of its connection status."
exit 0 ;;
*) echo "Unknown flag: ${flag}"
exit 1;;
esac
done

controller_url="https://jenkins.epic.oarcloud.noaa.gov"
controller_user="terry.mcguinness"
controller_user=${controller_user:-"terry.mcguinness"}
controller_user_auth_token="jenkins_token"

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
host=$(hostname)

Expand All @@ -13,12 +75,10 @@ host=$(hostname)
source "${HOMEgfs}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion | hercules | wcoss2)
echo "Launch Jenkins Java Controler on ${MACHINE_ID}"
;;
echo "Launch Jenkins Java Controler on ${MACHINE_ID}";;
*)
echo "Unsupported platform. Exiting with error."
exit 1
;;
exit 1;;
esac

LOG=lanuched_agent-$(date +%Y%m%d%M).log
Expand All @@ -43,9 +103,16 @@ echo "JAVA VERSION: "
${JAVA} -version

export GH="${HOME}/bin/gh"
command -v "${GH}"
[[ -f "${GH}" ]] || echo "gh is not installed in ${HOME}/bin"
${GH} --version

check_mark=$(gh auth status -t 2>&1 | grep "Token:" | awk '{print $1}') || true
if [[ "${check_mark}" != "" ]]; then
echo "gh not authenticating with emcbot token"
exit 1
fi
echo "gh authenticating with emcbot TOKEN ok"

if [[ -d "${JENKINS_AGENT_LANUCH_DIR}" ]]; then
echo "Jenkins Agent Lanuch Directory: ${JENKINS_AGENT_LANUCH_DIR}"
else
Expand All @@ -56,22 +123,62 @@ cd "${JENKINS_AGENT_LANUCH_DIR}"

if ! [[ -f agent.jar ]]; then
curl -sO "${controller_url}/jnlpJars/agent.jar"
echo "Updated agent.jar downloaded"
fi

if [[ ! -f "${controller_user_auth_token}" ]]; then
echo "User Jenkins authetication TOKEN to the controller for using the Remote API does not exist"
exit 1
fi
JENKINS_TOKEN=$(cat "${controller_user_auth_token}")

cat << EOF > parse.py
#!/usr/bin/env python3
import json,sys
with open(sys.argv[1], 'r') as file:
data = json.load(file)
print(data.get('offline','True'))
EOF
chmod u+x parse.py

JENKINS_TOKEN=$(cat jenkins_token)
check_node_online() {
rm -f curl_response
curl_response=$(curl --silent -u "${controller_user}:${JENKINS_TOKEN}" "${controller_url}/computer/${MACHINE_ID^}-EMC/api/json?pretty=true") || true
if [[ "${curl_response}" == "" ]]; then
echo "ERROR: Jenkins controller not reachable. Exiting with error."
exit 1
fi
echo -n "${curl_response}" > curl_response
./parse.py curl_response
}

lauch_agent () {
echo "Launching Jenkins Agent on ${host}"
command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}"
echo -e "Launching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}"
${command} >> "${LOG}" 2>&1 &
nohup_PID=$!
echo "Java agent running on PID: ${nohup_PID}" >> "${LOG}" 2>&1
}

if [[ "${force_launch}" == "True" ]]; then
lauch_agent
exit
fi

#
offline=$(curl --silent -u "${controller_user}:${JENKINS_TOKEN}" "${controller_url}/computer/${MACHINE_ID^}-EMC/api/json?pretty=true" | grep '\"offline\"' | awk '{gsub(/,/,"");print $3}') || true
echo "Jenkins Agent offline setting: ${offline}"
offline=$(set -e; check_node_online)

if [[ "${offline}" == "true" ]]; then
echo "Jenkins Agent is offline. Lanuching Jenkins Agent on ${host}"
command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}"
echo -e "Lanuching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}"
${command} >> "${LOG}" 2>&1 &
nohup_PID=$!
echo "Java agent running on PID: ${nohup_PID}" >> "${LOG}" 2>&1
echo "Java agent running on PID: ${nohup_PID}"
if [[ "${offline}" != "False" ]]; then
if [[ "${skip_wait}" != "True" ]]; then
echo "Jenkins Agent is offline. Waiting 5 more minutes to check again in the event it is a temp network issue"
sleep 300
offline=$(set -e; check_node_online)
fi
if [[ "${offline}" != "False" ]]; then
lauch_agent
else
echo "Jenkins Agent is online (nothing done)"
fi
else
echo "Jenkins Agent is online (nothing done)"
fi
4 changes: 2 additions & 2 deletions parm/config/gfs/config.resources
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,8 @@ case ${step} in
threads_per_task=1
walltime_gdas="03:00:00"
walltime_gfs="06:00:00"
ntasks=4
tasks_per_node=4
ntasks=1
tasks_per_node=1
export memory="80G"
;;

Expand Down
2 changes: 1 addition & 1 deletion ush/parsing_namelists_FV3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ cat >> input.nml <<EOF
iopt_tbot = ${iopt_tbot:-"2"}
iopt_stc = ${iopt_stc:-"1"}
iopt_trs = ${iopt_trs:-"2"}
iopt_diag = ${iopt_diag:-"1"}
iopt_diag = ${iopt_diag:-"2"}
debug = ${gfs_phys_debug:-".false."}
nstf_name = ${nstf_name}
nst_anl = ${nst_anl}
Expand Down
2 changes: 1 addition & 1 deletion ush/parsing_namelists_FV3_nest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ cat >> "${nml_file}" <<EOF
iopt_tbot = ${iopt_tbot:-"2"}
iopt_stc = ${iopt_stc:-"1"}
iopt_trs = ${iopt_trs:-"2"}
iopt_diag = ${iopt_diag:-"1"}
iopt_diag = ${iopt_diag:-"2"}
debug = ${gfs_phys_debug:-".false."}
nstf_name = ${nstf_name}
nst_anl = ${nst_anl}
Expand Down
2 changes: 1 addition & 1 deletion workflow/rocoto/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Tasks:
'prepsnowobs', 'snowanl',
'fcst',
'atmanlupp', 'atmanlprod', 'atmupp', 'goesupp',
'atmosprod', 'oceanprod', 'iceprod',
'atmos_prod', 'ocean_prod', 'ice_prod',
'verfozn', 'verfrad', 'vminmon',
'metp',
'tracker', 'genesis', 'genesis_fsu',
Expand Down

0 comments on commit f599cd7

Please sign in to comment.