Address incosistencies in DataScaling infrastructure #1138
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CPU tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
# Trigger on pull requests to master or develop that are | |
# marked as "ready for review" (non-draft PRs) | |
types: | |
- opened | |
- synchronize | |
- reopened | |
- ready_for_review | |
branches: | |
- master | |
- develop | |
push: | |
# Trigger on pushes to master or develop and for git tag pushes | |
branches: | |
- master | |
- develop | |
tags: | |
- v* | |
env: | |
IMAGE_NAME: mala_conda_cpu | |
IMAGE_REGISTRY: ghcr.io | |
DOCKER_CACHE_PATH: cache/docker | |
jobs: | |
build-docker-image-cpu: | |
# do not trigger on draft PRs | |
if: ${{ ! github.event.pull_request.draft }} | |
# Build and push temporary Docker image to GitHub's container registry | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Check out repository | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: '1' | |
- name: Set environment variables | |
run: | | |
# Change all uppercase letters to lowercase | |
IMAGE_REPO=$IMAGE_REGISTRY/$(echo $GITHUB_REPOSITORY_OWNER | tr '[A-Z]' '[a-z]') | |
# Create environment variable to which any subsequent steps inside this workflow's job have access | |
echo "IMAGE_REPO=$IMAGE_REPO" >> $GITHUB_ENV | |
echo "IMAGE_REPO=$IMAGE_REPO" | |
- name: Restore cache | |
uses: actions/cache@v4 | |
id: cache-docker | |
with: | |
path: ${{ env.DOCKER_CACHE_PATH }} | |
key: ${{ github.run_id }} | |
- name: Check existence of Docker tar archive | |
id: check_file | |
run: | | |
if [[ -f "$DOCKER_CACHE_PATH/docker-image.tar.gz" ]]; then | |
echo "FILE_EXISTS=true" >> $GITHUB_OUTPUT | |
else | |
echo "FILE_EXISTS=false" >> $GITHUB_OUTPUT | |
fi | |
- name: Pull latest image from container registry | |
run: docker pull $IMAGE_REPO/$IMAGE_NAME --quiet || true | |
- name: Build temporary Docker image | |
run: | | |
if [[ "${{ steps.check_file.outputs.FILE_EXISTS }}" == 'true' ]] | |
then | |
docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz | |
CACHE=$IMAGE_NAME:$GITHUB_RUN_ID | |
else | |
CACHE=$IMAGE_REPO/$IMAGE_NAME:latest | |
fi | |
DOCKER_BUILDKIT=0 docker build . --file Dockerfile --tag $IMAGE_NAME:local --cache-from=$CACHE --build-arg DEVICE=cpu | |
# Show images | |
docker images --filter=reference=$IMAGE_NAME --filter=reference=$IMAGE_REPO/$IMAGE_NAME | |
# Get image IDs (hash of the local image JSON configuration) and check if images are equal | |
IMAGE_ID_OLD=$(docker images --format "{{.ID}}" --no-trunc --filter=reference=$IMAGE_REPO/$IMAGE_NAME:latest) | |
IMAGE_ID_NEW=$(docker images --format "{{.ID}}" --no-trunc --filter=reference=$IMAGE_NAME:local) | |
echo "IMAGE_ID_OLD=$IMAGE_ID_OLD" >> $GITHUB_ENV | |
echo "IMAGE_ID_NEW=$IMAGE_ID_NEW" >> $GITHUB_ENV | |
if [[ "$IMAGE_ID_OLD" == "$IMAGE_ID_NEW" ]] | |
then | |
echo "Image IDs are equal"; DOCKER_TAG=latest | |
else | |
echo "Image IDs are different"; DOCKER_TAG=$GITHUB_RUN_ID | |
fi | |
# Environment variable DOCKER_TAG references the image in subsequent jobs | |
echo "DOCKER_TAG=$DOCKER_TAG" >> $GITHUB_ENV | |
- name: Tag and save temporary built Docker image | |
# If temporary image is different from latest on ghcr.io | |
if: env.IMAGE_ID_OLD != env.IMAGE_ID_NEW | |
run: | | |
mkdir -p $DOCKER_CACHE_PATH | |
# Use GITHUB_RUN_ID, a unique number for each workflow run within a repository as a temporary Docker tag | |
docker tag $IMAGE_NAME:local $IMAGE_NAME:$GITHUB_RUN_ID | |
# Save Docker image locally | |
docker save $IMAGE_NAME:$GITHUB_RUN_ID -o $DOCKER_CACHE_PATH/docker-image.tar | |
pigz -f -v --fast $DOCKER_CACHE_PATH/docker-image.tar | |
outputs: | |
# Make variables available to all downstream jobs that depend on this job | |
image-repo: ${{ env.IMAGE_REPO }} | |
docker-tag: ${{ env.DOCKER_TAG }} | |
cpu-tests: | |
needs: build-docker-image-cpu | |
runs-on: ubuntu-20.04 | |
env: | |
IMAGE_REPO: ${{ needs.build-docker-image-cpu.outputs.image-repo }} | |
DOCKER_TAG: ${{ needs.build-docker-image-cpu.outputs.docker-tag }} | |
steps: | |
- name: "Prepare environment: Restore cache" | |
if: env.DOCKER_TAG != 'latest' | |
uses: actions/cache@v4 | |
id: cache-docker | |
with: | |
path: ${{ env.DOCKER_CACHE_PATH }} | |
key: ${{ github.run_id }} | |
- name: "Prepare environment: Load Docker image from cache" | |
if: env.DOCKER_TAG != 'latest' | |
run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz --quiet | |
- name: "Prepare environment: Pull latest image from container registry" | |
if: env.DOCKER_TAG == 'latest' | |
run: | | |
docker pull $IMAGE_REPO/$IMAGE_NAME:latest --quiet | |
docker image tag $IMAGE_REPO/$IMAGE_NAME:latest $IMAGE_NAME:latest | |
- name: "Prepare environment: Run Docker container" | |
run: | | |
# Make the github workspace (i.e. checked out mala repository) available inside Docker container by binding it to /home via -v | |
docker run -i -d --rm --name mala-cpu -v ${{ github.workspace }}:/home -w /home $IMAGE_NAME:$DOCKER_TAG /bin/bash & | |
# Wait for Docker container to come online | |
wait | |
# Check running docker container | |
docker ps | |
# Check if docker container is running | |
[[ $(docker inspect --format '{{json .State.Running}}' mala-cpu) == 'true' ]] | |
- name: Check out repository (mala) | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: '1' | |
- name: Install mala package | |
# Exec all commands inside the mala-cpu container | |
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"' | |
run: | | |
# export Docker image Conda environment for a later comparison | |
conda env export -n mala-cpu > env_before.yml | |
# install mala package | |
pip --no-cache-dir install -e .[opt,test] --no-build-isolation | |
- name: Check if Conda environment meets the specified requirements | |
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"' | |
run: | | |
# export Conda environment _with_ mala package installed in it (and extra dependencies) | |
conda env export -n mala-cpu > env_after.yml | |
# if comparison fails, `install/mala_cpu_[base]_environment.yml` needs to be aligned with | |
# `requirements.txt` and/or extra dependencies are missing in the Docker Conda environment | |
if diff --brief env_before.yml env_after.yml | |
then | |
echo "Files env_before.yml and env_after.yml do not differ." | |
else | |
diff --side-by-side --color-always env_before.yml env_after.yml | |
fi | |
- name: Download test data repository from RODARE | |
shell: 'bash -c "docker exec -i mala-cpu python < {0}"' | |
run: | | |
import requests, shutil, zipfile | |
# This DOI represents all versions, and will always resolve to the latest one | |
DOI = "https://doi.org/10.14278/rodare.2900" | |
# Resolve DOI and get record ID and the associated API URL | |
response = requests.get(DOI) | |
*_, record_id = response.url.split("/") | |
api_url = f"https://rodare.hzdr.de/api/records/{record_id}" | |
# Download record from API and get the first file | |
response = requests.get(api_url) | |
record = response.json() | |
size = record["files"][0]["size"] | |
download_link = record["files"][0]["links"]["self"] | |
print(size, "bytes", "--", download_link) | |
# TODO: implement some sort of auto retry for failed HTTP requests | |
response = requests.get(download_link) | |
# Saving downloaded content to a file | |
with open("test-data.zip", mode="wb") as file: | |
file.write(response.content) | |
# Get top level directory name | |
dir_name = zipfile.ZipFile("test-data.zip").namelist()[0] | |
shutil.unpack_archive("test-data.zip", ".") | |
print(f"Rename {dir_name} to mala_data") | |
shutil.move(dir_name, "mala_data") | |
- name: Test mala | |
shell: 'bash -c "docker exec -i mala-cpu bash < {0}"' | |
run: MALA_DATA_REPO=$(pwd)/mala_data pytest --cov=mala --cov-fail-under=60 -m "not examples" --disable-warnings | |
retag-docker-image-cpu: | |
needs: [cpu-tests, build-docker-image-cpu] | |
runs-on: ubuntu-22.04 | |
permissions: | |
packages: write | |
env: | |
IMAGE_REPO: ${{ needs.build-docker-image-cpu.outputs.image-repo }} | |
DOCKER_TAG: ${{ needs.build-docker-image-cpu.outputs.docker-tag }} | |
# Trigger on pushes to master or develop (this includes _merged_ PRs) only if Docker image has changed and on git tag pushes | |
# NOTE: The `env` context is not available for workflow key `jobs.<job_id>.if`. | |
if: | | |
((contains(github.ref_name, 'develop') || contains(github.ref_name, 'master')) && needs.build-docker-image-cpu.outputs.docker-tag != 'latest') | |
|| startsWith(github.ref, 'refs/tags/') | |
steps: | |
- name: "Prepare environment: Restore cache" | |
if: env.DOCKER_TAG != 'latest' | |
uses: actions/cache@v4 | |
id: cache-docker | |
with: | |
path: ${{ env.DOCKER_CACHE_PATH }} | |
key: ${{ github.run_id }} | |
- name: "Prepare environment: Load Docker image from cache" | |
if: env.DOCKER_TAG != 'latest' | |
run: docker load -i $DOCKER_CACHE_PATH/docker-image.tar.gz --quiet | |
- name: "Prepare environment: Pull latest image from container registry" | |
if: env.DOCKER_TAG == 'latest' | |
run: docker pull $IMAGE_REPO/$IMAGE_NAME:latest --quiet | |
- name: Tag Docker image | |
run: | | |
# Execute on change of Docker image | |
if [[ "$DOCKER_TAG" != 'latest' ]]; then | |
docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:latest | |
docker tag $IMAGE_NAME:$GITHUB_RUN_ID $IMAGE_REPO/$IMAGE_NAME:${GITHUB_REF_NAME}-${GITHUB_SHA:0:7} | |
fi | |
# Execute on push of git tag | |
if ${{ startsWith(github.ref, 'refs/tags/') }}; then | |
GIT_TAG=$(echo "${{ github.ref_name }}" | sed -e 's/^v//') | |
echo "GIT_TAG=$GIT_TAG" | |
docker tag $IMAGE_REPO/$IMAGE_NAME:latest $IMAGE_REPO/$IMAGE_NAME:$GIT_TAG | |
fi | |
- name: Log into container registry | |
# Authentication required for pushing images | |
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin | |
- name: Push Docker image | |
run: docker push $IMAGE_REPO/$IMAGE_NAME --all-tags | grep -v -E 'Waiting|Layer already|Preparing|Pushed' |