From 81ebba39ad62a347996c0b457f858f0e2a9e3bd5 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 5 Dec 2024 17:33:49 -0800 Subject: [PATCH] Wait for docker build (#6013) This is a short-term mitigation for https://github.com/pytorch/pytorch/issues/141885 in which any changes touching `.ci/docker` would cause all the builds to fail until docker build workflow finishes building the images. At the moment, we don't have a good way to tell the build workflow to wait for the new docker image, so my fix here attempts to inject a delay when the action is called by `_linux_build`. It will wait up to 90 minutes for the Docker build to finish ### Testing https://github.com/pytorch/pytorch/pull/142177 --- .../actions/calculate-docker-image/action.yml | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/actions/calculate-docker-image/action.yml b/.github/actions/calculate-docker-image/action.yml index 74c88e14d4..cea29cff14 100644 --- a/.github/actions/calculate-docker-image/action.yml +++ b/.github/actions/calculate-docker-image/action.yml @@ -87,6 +87,7 @@ runs: DOCKER_IMAGE: ${{ steps.calculate-image.outputs.docker-image }} DOCKER_TAG: ${{ steps.calculate-image.outputs.docker-tag }} DOCKER_REGISTRY: ${{ inputs.docker-registry }} + DOCKER_PUSH: ${{ inputs.push }} run: | set +e set -x @@ -101,10 +102,25 @@ runs: retry login "${DOCKER_REGISTRY}" - # Check if image already exists, if it does then skip building it - if docker manifest inspect "${DOCKER_IMAGE}"; then - exit 0 - fi + START_TIME=$(date +%s) + # Wait up to 90 minutes + while [[ $(( $(date +%s) - 5400 )) -lt $START_TIME ]]; do + # Check if image already exists, if it does then skip building it + if docker manifest inspect "${DOCKER_IMAGE}"; then + exit 0 + fi + + # NB: This flag is used by Docker build workflow to push the image to ECR, so we can + # use this to differentiate between the Docker build and regular build jobs. For the + # latter, it will wait for the Docker images to become available before continuing + if [ "${DOCKER_PUSH:-false}" == "true" ]; then + # It's a Docker build job, let's build the image + break + else + # It's a regular build job, wait for the image to become available + sleep 300 + fi + done # NB: This part requires a full checkout. Otherwise, the merge base will # be empty. The default action would be to continue rebuild the image