diff --git a/.github/workflows/container_maintenance.yml b/.github/workflows/container_maintenance.yml index 3a28f355e94..29705d06208 100644 --- a/.github/workflows/container_maintenance.yml +++ b/.github/workflows/container_maintenance.yml @@ -18,42 +18,14 @@ on: env: PLATFORMS: linux/amd64,linux/arm64 NUM_PAST_RELEASES: 3 - # TODO: change to "develop" in final PR - DEVELOP_BRANCH: 10478-version-base-img jobs: - discover: - name: Discover Release Matrix - runs-on: ubuntu-latest - permissions: - contents: read - packages: read - # TODO: re-enable for final PR - # Only run in upstream repo - avoid unnecessary runs in forks and only for scheduled - #if: ${{ github.repository_owner == 'IQSS' }} - outputs: - branches: ${{ steps.matrix.outputs.branches }} - current_release: ${{ steps.matrix.outputs.current_release }} - steps: - - name: Build branch matrix options - id: matrix - run: | - echo "branches=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | \ - jq '[ .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, "${{ env.DEVELOP_BRANCH }}" ]')" | tr -d "\n" | tr -s " " | \ - tee -a "$GITHUB_OUTPUT" - echo "current_release=$(curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq '.[0].tag_name' )" | tee -a "$GITHUB_OUTPUT" - build: name: Build image runs-on: ubuntu-latest permissions: contents: read packages: read - needs: discover - strategy: - fail-fast: false - matrix: - branch: ${{ fromJson(needs.discover.outputs.branches) }} # TODO: re-enable for final PR # Only run in upstream repo - avoid unnecessary runs in forks #if: ${{ github.repository_owner == 'IQSS' }} @@ -64,7 +36,6 @@ jobs: # Necessary as the checked out release branch might not contain the action as files uses: gdcc/wip-dataverse-base-image/.github/actions/setup-maven@10478-version-base-img with: - git-reference: ${{ matrix.branch }} pom-paths: modules/container-base/pom.xml # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and @@ -79,93 +50,21 @@ jobs: with: platforms: ${{ env.PLATFORMS }} - # Try to retrieve backport patches for this git ref (but don't fail if there aren't any) - # and try to apply them if present - - name: Get and apply backported patches - # There might be no patches - ignore errors - continue-on-error: true - run: | - mkdir -p "${GITHUB_WORKSPACE}/patches" - curl -sSL "https://github.com/${GITHUB_REPOSITORY}/archive/${DEVELOP_BRANCH}.tar.gz" | \ - tar -zxf - -C "${GITHUB_WORKSPACE}/patches" --wildcards "*/modules/container-base/src/backports/${{ matrix.branch }}" --strip-components=6 - find "${GITHUB_WORKSPACE}/patches" -type f -name '*.patch' -print0 | xargs -0 -n1 patch -p1 -s -i - - # Determine the base image name we are going to use from here on - - name: Determine base image name - run: | - if [[ "${{ matrix.branch }}" = "${{ env.DEVELOP_BRANCH }}" ]]; then - echo "BASE_IMAGE=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -q -DforceStdout )" | tee -a "${GITHUB_ENV}" - echo "BASE_IMAGE_UPCOMING=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout )" | tee -a "${GITHUB_ENV}" - else - echo "BASE_IMAGE=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout )" | tee -a "${GITHUB_ENV}" - fi - - # Figure out if a rebuild is necessary because either there is an updated Java image or our installed packages need updates - - name: Check for recent Temurin image updates - id: temurin-check - # TODO: change to upstream location in final PR - uses: gdcc/wip-dataverse-base-image/.github/actions/check-newer-parent-image@10478-version-base-img - with: - parent: "$( mvn help:evaluate -Pct -f modules/container-base -Dexpression=java.image -q -DforceStdout )" - derived: "${{ env.BASE_IMAGE }}" - # TODO: if we introduce more flavors as a matrix, we need to adapt the install command to check for updates - - name: Check for package updates in base image - id: package-check - if: ${{ steps.temurin-check.outputs.is-more-recent == 'false' }} + # Discover the releases we want to maintain + - name: Discover maintained releases + id: discover run: | - PKGS="$( grep "ARG PKGS" modules/container-base/src/main/docker/Dockerfile | cut -f2 -d= | tr -d '"' )" - if [[ ! $( docker run --rm -u 0 "${BASE_IMAGE}" sh -c "apt update >&2 && apt install -s ${PKGS}" | grep "0 upgraded" ) ]]; then - echo "Base image $BASE_IMAGE needs updates for our custom installed packages" - echo "newer_packages=true" >> "${GITHUB_OUTPUT}" - else - echo "Base image $BASE_IMAGE has no updates for our custom installed packages" - echo "newer_packages=false" >> "${GITHUB_OUTPUT}" - fi + DEVELOPMENT_BRANCH=$( curl -f -sS https://api.github.com/repos/${{ github.repository }} | jq -r '.default_branch' ) + echo "DEVELOPMENT_BRANCH=$DEVELOPMENT_BRANCH" | tee -a "$GITHUB_ENV" + echo "branches=$( curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq -r " .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, \"${DEVELOPMENT_BRANCH}\" " )" | tee -a "${GITHUB_OUTPUT}" + - # TODO: In a future version of this script, we might want to include checking for other security updates, - # not just updates to the packages we installed. - # grep security /etc/apt/sources.list > /tmp/security.list - # apt-get update -oDir::Etc::Sourcelist=/tmp/security.list - # apt-get dist-upgrade -y -oDir::Etc::Sourcelist=/tmp/security.list -oDir::Etc::SourceParts=/bin/false -s - - - name: Calculate revision number for immutable tag (on release branches only) - if: ${{ matrix.branch != env.DEVELOP_BRANCH }} - id: revision-tag - # TODO: change to upstream location in final PR - uses: gdcc/wip-dataverse-base-image/.github/actions/get-image-revision@10478-version-base-img - with: - image-ref: ${{ env.BASE_IMAGE }} - tag-options-prefix: "-Dbase.image.tag.suffix='' -Ddocker.tags.revision=" - - name: Configure update of "latest" tag for development branch - id: develop-tag - if: ${{ matrix.branch == env.DEVELOP_BRANCH }} + # Execute matrix build for the discovered branches + - name: Execute build matrix script + id: execute run: | - echo "tag-options=-Ddocker.tags.develop=unstable -Ddocker.tags.upcoming=${BASE_IMAGE_UPCOMING#*:}" | tee -a "${GITHUB_OUTPUT}" - - name: Deploy multi-arch base container image to Docker Hub - if: ${{ steps.temurin-check.outputs.is-more-recent == 'true' || steps.package-check.outputs.newer_packages == 'true' || inputs.force_build }} - id: build - run: | - mvn -f modules/container-base -Pct deploy -Ddocker.noCache -Ddocker.platforms=${{ env.PLATFORMS }} \ - -Ddocker.imagePropertyConfiguration=override ${{ steps.develop-tag.outputs.tag-options }} ${{ steps.revision-tag.outputs.tag-options }} - echo "rebuild=true" | tee -a "${GITHUB_OUTPUT}" - - # TODO: this is here to not drop the knowledge about matrix output workarounds (for now) - # - if: always() - # name: Save status (workaround for matrix outputs) - # run: | - # # steps.build.outcome is the status BEFORE continue-on-error - # echo "STATUS_$( echo "${{ matrix.branch }}" | tr ".:;,-/ " "_" )=${{ steps.build.outcome }}" | tee -a "${GITHUB_ENV}" - - # TODO: As part of issue #10618 we will need to create this action, shipping updated app images - #- name: Rebuild application container - # if: ${{ steps.build.outputs.rebuild }} - # uses: ./.github/actions/deploy-app-container - # with: - # registry: "" - # registry_token: "" - # ref: "" - # base_image: "" - # base_image_tag: "" + echo "force_build=${{ inputs.force_build }}" + .github/workflows/scripts/maintenance-job.sh ${{ steps.discover.outputs.branches }} # TODO: This job should become part of the matrix as an action, so we don't need to fiddle with matrix outputs hacks #push-app-img: diff --git a/.github/workflows/scripts/maintenance-job.sh b/.github/workflows/scripts/maintenance-job.sh new file mode 100755 index 00000000000..a3b8c72bb52 --- /dev/null +++ b/.github/workflows/scripts/maintenance-job.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# A matrix-like job to maintain a number of releases as well as the latest snap of Dataverse. + +# PREREQUISITES: +# - You have Java, Maven, QEMU and Docker all setup and ready to go +# - You obviously checked out the develop branch, otherwise you'd not be executing this script +# - You added all the branch names you want to run maintenance for as arguments +# Optional, but recommended: +# - You added a DEVELOPMENT_BRANCH env var to your runner/job env with the name of the development branch +# - You added a FORCE_BUILD=0|1 env var to indicate if the base image build should be forced +# - You added a PLATFORMS env var with all the target platforms you want to build for + +# NOTE: +# This script is a culmination of Github Action steps into a single script. +# The reason to put all of this in here is due to the complexity of the Github Action and the limitation of the +# matrix support in Github actions, where outputs cannot be aggregated or otherwise used further. + +set -euo pipefail + +# Get all the inputs +# If not within a runner, just print to stdout (duplicating the output in case of tee usage, but that's ok for testing) +GITHUB_OUTPUT=${GITHUB_OUTPUT:-"/proc/self/fd/1"} +GITHUB_ENV=${GITHUB_ENV:-"/proc/self/fd/1"} +GITHUB_WORKSPACE=${GITHUB_WORKSPACE:-"$(pwd)"} +GITHUB_SERVER_URL=${GITHUB_SERVER_URL:-"https://github.com"} +GITHUB_REPOSITORY=${GITHUB_REPOSITORY:-"IQSS/dataverse"} + +MAINTENANCE_WORKSPACE="${GITHUB_WORKSPACE}/maintenance-job" + +DEVELOPMENT_BRANCH="${DEVELOPMENT_BRANCH:-"develop"}" +FORCE_BUILD="${FORCE_BUILD:-"0"}" +PLATFORMS="${PLATFORMS:-"linux/amd64,linux/arm64"}" + +# Setup and validation +if [[ -z "$*" ]]; then + >&2 echo "You must give a list of branch names as arguments" + exit 1; +fi + +source "$( dirname "$0" )/utils.sh" + +# Delete old stuff if present +rm -rf "$MAINTENANCE_WORKSPACE" +mkdir -p "$MAINTENANCE_WORKSPACE" + +# Cache the image tags we maintain in this array (same order as branches array!) +# This list will be used to build the support matrix within the Docker Hub image description +SUPPORTED_ROLLING_TAGS=() + +for BRANCH in "$@"; do + echo "::group::Running maintenance for $BRANCH" + + # 0. Determine if this is a development branch and the most current release + IS_DEV=0 + if [[ "$BRANCH" = "$DEVELOPMENT_BRANCH" ]]; then + IS_DEV=1 + fi + IS_CURRENT_RELEASE=0 + if [[ "$BRANCH" = $( curl -f -sS "https://api.github.com/repos/$GITHUB_REPOSITORY/releases" | jq '.[0].tag_name' ) ]]; then + IS_CURRENT_RELEASE=1 + fi + + # 1. Let's get the maintained sources + git clone -c advice.detachedHead=false --depth 1 --branch "$BRANCH" "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}" "$MAINTENANCE_WORKSPACE/$BRANCH" + # Switch context + cd "$MAINTENANCE_WORKSPACE/$BRANCH" + + # 2. Now let's apply the patches (we have them checked out in $GITHUB_WORKSPACE, not necessarily in this local checkout) + find "${GITHUB_WORKSPACE}/modules/container-base/src/backports/$BRANCH" -type f -name '*.patch' -print0 | xargs -0 -n1 patch -p1 -s -i + + # 3. Determine the base image ref (/:) + BASE_IMAGE_REF="" + # For the dev branch we want to full flexi stack tag, to detect stack upgrades requiring new build + if (( IS_DEV )); then + BASE_IMAGE_REF=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -q -DforceStdout ) + else + BASE_IMAGE_REF=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout ) + fi + + # 4. Check for Temurin image updates + JAVA_IMAGE_REF=$( mvn help:evaluate -Pct -f modules/container-base -Dexpression=java.image -q -DforceStdout ) + NEWER_JAVA_IMAGE=0 + if check_newer_parent "$JAVA_IMAGE_REF" "$BASE_IMAGE_REF"; then + NEWER_JAVA_IMAGE=1 + fi + + # 5. Check for package updates in base image + PKGS="$( grep "ARG PKGS" modules/container-base/src/main/docker/Dockerfile | cut -f2 -d= | tr -d '"' )" + NEWER_PKGS=0 + # Don't bother with package checks if the java image is newer already + if ! (( NEWER_JAVA_IMAGE )); then + if check_newer_pkgs "$BASE_IMAGE_REF" "$PKGS"; then + NEWER_PKGS=1 + fi + fi + + # 6. Get current immutable revision tag if not on the dev branch + REV=$( current_revision "$BASE_IMAGE_REF" ) + CURRENT_REV_TAG="${BASE_IMAGE_REF#*:}-r$REV" + NEXT_REV_TAG="${BASE_IMAGE_REF#*:}-r$(( REV + 1 ))" + + # 7. Let's put together what tags we want added to this build run + TAG_OPTIONS="" + if ! (( IS_DEV )); then + TAG_OPTIONS="-Dbase.image=$BASE_IMAGE_REF -Ddocker.tags.revision=$NEXT_REV_TAG" + + # In case of the current release, add the "latest" tag as well. Also add to list of rolling tags. + if (( IS_CURRENT_RELEASE )); then + TAG_OPTIONS="$TAG_OPTIONS -Ddocker.tags.latest=latest" + SUPPORTED_ROLLING_TAGS+=("[\"latest\", \"${BASE_IMAGE_REF#*:}\"]") + else + SUPPORTED_ROLLING_TAGS+=("[\"${BASE_IMAGE_REF#*:}\"]") + fi + else + UPCOMING_TAG=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image.tag -Dbase.image.tag.suffix="" -q -DforceStdout ) + TAG_OPTIONS="-Ddocker.tags.develop=unstable -Ddocker.tags.upcoming=$UPCOMING_TAG" + + SUPPORTED_ROLLING_TAGS+=("[\"unstable\", \"$UPCOMING_TAG\", \"${BASE_IMAGE_REF#*:}\"]") + fi + echo "Determined these additional Maven tag options: $TAG_OPTIONS" + + # 7. Let's build the base image if necessary + NEWER_BASE_IMAGE=0 + if (( NEWER_JAVA_IMAGE + NEWER_PKGS + FORCE_BUILD > 0 )); then + mvn -Pct -f modules/container-base deploy -Ddocker.noCache -Ddocker.platforms="${PLATFORMS}" \ + -Ddocker.imagePropertyConfiguration=override $TAG_OPTIONS + NEWER_BASE_IMAGE=1 + fi + + if (( NEWER_BASE_IMAGE )); then + echo "Built a new base image, should continue with application images now..." + # TODO: rebuild the app images here + fi + + echo "::endgroup::" +done diff --git a/.github/workflows/scripts/utils.sh b/.github/workflows/scripts/utils.sh new file mode 100644 index 00000000000..e700f685470 --- /dev/null +++ b/.github/workflows/scripts/utils.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +set -euo pipefail + +function check_newer_parent() { + PARENT_IMAGE="$1" + # Get namespace, default to "library" if not found + PARENT_IMAGE_NS="${PARENT_IMAGE%/*}" + if [[ "$PARENT_IMAGE_NS" = "${PARENT_IMAGE}" ]]; then + PARENT_IMAGE_NS="library" + fi + PARENT_IMAGE_REPO="${PARENT_IMAGE%:*}" + PARENT_IMAGE_TAG="${PARENT_IMAGE#*:}" + + PARENT_IMAGE_LAST_UPDATE="$( curl -sS "https://hub.docker.com/v2/namespaces/${PARENT_IMAGE_NS}/repositories/${PARENT_IMAGE_REPO}/tags/${PARENT_IMAGE_TAG}" | jq -r .last_updated )" + if [[ "$PARENT_IMAGE_LAST_UPDATE" = "null" ]]; then + echo "::error title='Invalid PARENT Image'::Could not find ${PARENT_IMAGE} in the registry" + exit 1 + fi + + DERIVED_IMAGE="$2" + # Get namespace, default to "library" if not found + DERIVED_IMAGE_NS="${DERIVED_IMAGE%/*}" + if [[ "${DERIVED_IMAGE_NS}" = "${DERIVED_IMAGE}" ]]; then + DERIVED_IMAGE_NS="library" + fi + DERIVED_IMAGE_REPO="$( echo "${DERIVED_IMAGE%:*}" | cut -f2 -d/ )" + DERIVED_IMAGE_TAG="${DERIVED_IMAGE#*:}" + + DERIVED_IMAGE_LAST_UPDATE="$( curl -sS "https://hub.docker.com/v2/namespaces/${DERIVED_IMAGE_NS}/repositories/${DERIVED_IMAGE_REPO}/tags/${DERIVED_IMAGE_TAG}" | jq -r .last_updated )" + if [[ "$DERIVED_IMAGE_LAST_UPDATE" = "null" || "$DERIVED_IMAGE_LAST_UPDATE" < "$PARENT_IMAGE_LAST_UPDATE" ]]; then + echo "Parent image $PARENT_IMAGE has a newer release ($PARENT_IMAGE_LAST_UPDATE), which is more recent than $DERIVED_IMAGE ($DERIVED_IMAGE_LAST_UPDATE)" + return 0 + else + echo "Parent image $PARENT_IMAGE ($PARENT_IMAGE_LAST_UPDATE) is older than $DERIVED_IMAGE ($DERIVED_IMAGE_LAST_UPDATE)" + return 1 + fi +} + +function check_newer_pkgs() { + IMAGE="$1" + PKGS="$2" + + docker run --rm -u 0 "${IMAGE}" sh -c "apt update >/dev/null 2>&1 && apt install -s ${PKGS}" | tee /proc/self/fd/2 | grep -q "0 upgraded" + + if [[ $? ]]; then + echo "Base image $IMAGE needs updates for our custom installed packages" + return 0 + else + echo "Base image $IMAGE has no updates for our custom installed packages" + return 1 + fi + + # TODO: In a future version of this script, we might want to include checking for other security updates, + # not just updates to the packages we installed. + # grep security /etc/apt/sources.list > /tmp/security.list + # apt-get update -oDir::Etc::Sourcelist=/tmp/security.list + # apt-get dist-upgrade -y -oDir::Etc::Sourcelist=/tmp/security.list -oDir::Etc::SourceParts=/bin/false -s + +} + +function current_revision() { + IMAGE="$1" + IMAGE_NS_REPO="${IMAGE%:*}" + IMAGE_TAG="${IMAGE#*:}" + + if [[ "$IMAGE_TAG" = "$IMAGE_NS_REPO" ]]; then + >&2 echo "You must provide an image reference in the format [/]:" + exit 1 + fi + + case "$IMAGE_NS_REPO" in + */*) :;; # namespace/repository syntax, leave as is + *) IMAGE_NS_REPO="library/$IMAGE_NS_REPO";; # bare repository name (docker official image); must convert to namespace/repository syntax + esac + + # Without such a token we may run into rate limits + # OB 2024-09-16: for some reason using this token stopped working. Let's go without and see if we really fall into rate limits. + # token=$( curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:$IMAGE_NS_REPO:pull" ) + + ALL_TAGS="$( + i=0 + while [ $? == 0 ]; do + i=$((i+1)) + # OB 2024-09-16: for some reason using this token stopped working. Let's go without and see if we really fall into rate limits. + # RESULT=$( curl -s -H "Authorization: Bearer $token" "https://registry.hub.docker.com/v2/repositories/$IMAGE_NS_REPO/tags/?page=$i&page_size=100" ) + RESULT=$( curl -s "https://registry.hub.docker.com/v2/repositories/$IMAGE_NS_REPO/tags/?page=$i&page_size=100" ) + if [[ $( echo "$RESULT" | jq '.message' ) != "null" ]]; then + # If we run into an error on the first attempt, that means we have a problem. + if [[ "$i" == "1" ]]; then + >&2 echo "Error when retrieving tag data: $( echo "$RESULT" | jq '.message' )" + exit 2 + # Otherwise it will just mean we reached the last page already + else + break + fi + else + echo "$RESULT" | jq -r '."results"[]["name"]' + # DEBUG: + #echo "$RESULT" | >&2 jq -r '."results"[]["name"]' + fi + done + )" + + # Note: if a former tag could not be found, it just might not exist already. Start new series with rev 0 + echo "$ALL_TAGS" | grep "${IMAGE_TAG}-r" | sed -e "s#${IMAGE_TAG}-r##" | sort -h | tail -n1 || echo "-1" +}