Skip to content

Commit

Permalink
Update Java Agent launching script for Jenkins connections (#2762)
Browse files Browse the repository at this point in the history
Made updates to the Jenkins Launching Script for robustness and less
ambiguous documentation:
- Clearer distinction between required user token for the remote api and
the systems token for launching
- Added pre-checks: `gh` is authenticating, named compliant token and
secret file exists
- More robust Jason based parser of the remote api response for checking
the state of the Node connection
- For `cron` use a 5 minute pause and recheck was added before
re-launching of the java agent
- Added concise header documentation of requirements and purpose 
---------

Co-authored-by: David Huber <[email protected]>
Co-authored-by: Walter Kolczynski - NOAA <[email protected]>
  • Loading branch information
3 people authored Aug 2, 2024
1 parent b73b1fd commit 0706c59
Showing 1 changed file with 125 additions and 18 deletions.
143 changes: 125 additions & 18 deletions ci/scripts/utils/launch_java_agent.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,70 @@
#!/bin/env bash

set -e

# ==============================================================================
# Script Name: launch_java_agent.sh
#
# Description:
# This script automates the process of launching a Jenkins agent
# on a specified machine. It ensures that the necessary
# prerequisites are met, such as the availability of JAVA_HOME,
# the Jenkins agent launch directory, and proper authentication
# with GitHub.
#
# It then proceeds to check if the Jenkins node is online and
# decides whether to launch the Jenkins agent based on the node's
# status. The agent is launched in the background,
# and its PID is logged for reference.
#
# Prerequisites:
# JAVA_HOME must be set to a valid JDK installation.
# Jenkins agent launch directory must exist and be specified.
# GitHub CLI (gh) must be installed and authenticated for messeging
# from the Jenkins controller to GitHub PR via shell commands.
# Jenkins agent launch directory must exist and be specified.
# TODO: Must use GitHub CLI v2.25.1 (newer versoins have issues)
# https://github.com/cli/cli/releases/download/v2.25.1/gh_2.25.1_linux_amd64.tar.gz
# Jenkins controller URL and authentication token must be provided.
# jenkins-secret-file:
# Must be present in the Jenkins agent launch directory.
# This file contains the secret key for the Jenkins agent
# established by the Jenkins administrator for each Node.
# jenkins_token:
# Must be present in the Jenkins agent launch directory.
# This file contains the user authentication token for the Jenkins controller
# to use the Remote API. This token can be generated by the user
# on the Jenkins controller.
# controller_user:
# Must be set to the Jenkins controller username corresponing to the jenkins_token.
#
# Usage: ./launch_java_agent.sh [now] [-f]
# The optional 'now' argument forces the script to launch the Jenkins
# agent without waiting before trying again.
# The optional 'force' argument forces the script to launch the Jenkins regarless of the node status.
#
# ==============================================================================

force_launch="False"
skip_wait="False"
while getopts ":fnh" flag; do
case "${flag}" in
f) force_launch="True";;
n) skip_wait="True";;
h) echo "Usage: ./launch_java_agent.sh [now] [force]
Two mutually exclusive optional arguments:
-n (now) causes the script to launch the Jenkins agent without waiting before trying again.
-f (force) forces the script to launch the Jenkins regarless of its connection status."
exit 0 ;;
*) echo "Unknown flag: ${flag}"
exit 1;;
esac
done

controller_url="https://jenkins.epic.oarcloud.noaa.gov"
controller_user="terry.mcguinness"
controller_user=${controller_user:-"terry.mcguinness"}
controller_user_auth_token="jenkins_token"

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
host=$(hostname)

Expand All @@ -13,12 +75,10 @@ host=$(hostname)
source "${HOMEgfs}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion | hercules | wcoss2)
echo "Launch Jenkins Java Controler on ${MACHINE_ID}"
;;
echo "Launch Jenkins Java Controler on ${MACHINE_ID}";;
*)
echo "Unsupported platform. Exiting with error."
exit 1
;;
exit 1;;
esac

LOG=lanuched_agent-$(date +%Y%m%d%M).log
Expand All @@ -43,9 +103,16 @@ echo "JAVA VERSION: "
${JAVA} -version

export GH="${HOME}/bin/gh"
command -v "${GH}"
[[ -f "${GH}" ]] || echo "gh is not installed in ${HOME}/bin"
${GH} --version

check_mark=$(gh auth status -t 2>&1 | grep "Token:" | awk '{print $1}') || true
if [[ "${check_mark}" != "" ]]; then
echo "gh not authenticating with emcbot token"
exit 1
fi
echo "gh authenticating with emcbot TOKEN ok"

if [[ -d "${JENKINS_AGENT_LANUCH_DIR}" ]]; then
echo "Jenkins Agent Lanuch Directory: ${JENKINS_AGENT_LANUCH_DIR}"
else
Expand All @@ -56,22 +123,62 @@ cd "${JENKINS_AGENT_LANUCH_DIR}"

if ! [[ -f agent.jar ]]; then
curl -sO "${controller_url}/jnlpJars/agent.jar"
echo "Updated agent.jar downloaded"
fi

if [[ ! -f "${controller_user_auth_token}" ]]; then
echo "User Jenkins authetication TOKEN to the controller for using the Remote API does not exist"
exit 1
fi
JENKINS_TOKEN=$(cat "${controller_user_auth_token}")

cat << EOF > parse.py
#!/usr/bin/env python3
import json,sys
with open(sys.argv[1], 'r') as file:
data = json.load(file)
print(data.get('offline','True'))
EOF
chmod u+x parse.py

JENKINS_TOKEN=$(cat jenkins_token)
check_node_online() {
rm -f curl_response
curl_response=$(curl --silent -u "${controller_user}:${JENKINS_TOKEN}" "${controller_url}/computer/${MACHINE_ID^}-EMC/api/json?pretty=true") || true
if [[ "${curl_response}" == "" ]]; then
echo "ERROR: Jenkins controller not reachable. Exiting with error."
exit 1
fi
echo -n "${curl_response}" > curl_response
./parse.py curl_response
}

lauch_agent () {
echo "Launching Jenkins Agent on ${host}"
command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}"
echo -e "Launching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}"
${command} >> "${LOG}" 2>&1 &
nohup_PID=$!
echo "Java agent running on PID: ${nohup_PID}" >> "${LOG}" 2>&1
}

if [[ "${force_launch}" == "True" ]]; then
lauch_agent
exit
fi

#
offline=$(curl --silent -u "${controller_user}:${JENKINS_TOKEN}" "${controller_url}/computer/${MACHINE_ID^}-EMC/api/json?pretty=true" | grep '\"offline\"' | awk '{gsub(/,/,"");print $3}') || true
echo "Jenkins Agent offline setting: ${offline}"
offline=$(set -e; check_node_online)

if [[ "${offline}" == "true" ]]; then
echo "Jenkins Agent is offline. Lanuching Jenkins Agent on ${host}"
command="nohup ${JAVA} -jar agent.jar -jnlpUrl ${controller_url}/computer/${MACHINE_ID^}-EMC/jenkins-agent.jnlp -secret @jenkins-secret-file -workDir ${JENKINS_WORK_DIR}"
echo -e "Lanuching Jenkins Agent on ${host} with the command:\n${command}" >& "${LOG}"
${command} >> "${LOG}" 2>&1 &
nohup_PID=$!
echo "Java agent running on PID: ${nohup_PID}" >> "${LOG}" 2>&1
echo "Java agent running on PID: ${nohup_PID}"
if [[ "${offline}" != "False" ]]; then
if [[ "${skip_wait}" != "True" ]]; then
echo "Jenkins Agent is offline. Waiting 5 more minutes to check again in the event it is a temp network issue"
sleep 300
offline=$(set -e; check_node_online)
fi
if [[ "${offline}" != "False" ]]; then
lauch_agent
else
echo "Jenkins Agent is online (nothing done)"
fi
else
echo "Jenkins Agent is online (nothing done)"
fi

0 comments on commit 0706c59

Please sign in to comment.