.github/workflows/benchmark_large.yml

# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Executes large benchmarks that are expected to take multple hours.

name: Benchmark Large

on:
  schedule:
    # Scheduled to run at 09:00 UTC and 21:00 UTC.
    - cron: '0 09,21 * * *'
  workflow_dispatch:
    inputs:
      shard-count:
        description: |
          A device-name to integer mapping as a comma separated list.
          Allows to assign a distinct shard count for each target device.
          The reserved keyword `default` assigns a shard count to all target devices
          that are not explicitly listed.
        # Please keep this default value in sync with the jobs.build_e2e_test_artifacts.with.shard-count field below
        default: a2-highgpu-1g=1,c2-standard-60=2,default=1
        type: string

concurrency:
  # A PR number if a pull request and otherwise the commit hash. This cancels
  # queued and in-progress runs for the same PR (presubmit) or commit
  # (postsubmit). The workflow name is prepended to avoid conflicts between
  # different workflows.
  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
  cancel-in-progress: true

env:
  # This needs to be in env instead of the outputs of setup because it contains
  # the run attempt and we want that to be the current attempt, not whatever
  # attempt the setup step last ran in.
  GCS_DIR: gs://iree-github-actions-${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}-artifacts/${{ github.run_id }}/${{ github.run_attempt }}

# Jobs are organized into groups and topologically sorted by dependencies
jobs:
  setup:
    uses: ./.github/workflows/setup.yml

  build_all:
    needs: setup
    if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'build_all')
    uses: ./.github/workflows/build_all.yml
    with:
      runner-group: ${{ needs.setup.outputs.runner-group }}
      runner-env: ${{ needs.setup.outputs.runner-env }}
      write-caches: ${{ needs.setup.outputs.write-caches }}

  build_benchmark_tools:
    needs: [setup, build_all]
    if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'build_benchmark_tools')
    uses: ./.github/workflows/build_benchmark_tools.yml
    with:
      runner-group: ${{ needs.setup.outputs.runner-group }}
      runner-env: ${{ needs.setup.outputs.runner-env }}
      install-dir: ${{ needs.build_all.outputs.install-dir }}
      install-dir-archive: ${{ needs.build_all.outputs.install-dir-archive }}
      install-dir-gcs-artifact: ${{ needs.build_all.outputs.install-dir-gcs-artifact }}

  build_e2e_test_artifacts:
    needs: [setup, build_all]
    if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'build_e2e_test_artifacts')
    uses: ./.github/workflows/build_e2e_test_artifacts.yml
    with:
      runner-group: ${{ needs.setup.outputs.runner-group }}
      runner-env: ${{ needs.setup.outputs.runner-env }}
      install-dir: ${{ needs.build_all.outputs.install-dir }}
      install-dir-archive: ${{ needs.build_all.outputs.install-dir-archive }}
      install-dir-gcs-artifact: ${{ needs.build_all.outputs.install-dir-gcs-artifact }}
      benchmark-presets: cuda-large,comp-stats-large,x86_64-large
      build-default-benchmark-suites: 0
      # Please keep the shard count default value in sync with on.workflow_dispatch.shard-count.default
      shard-count: ${{ inputs && inputs.shard_count || 'a2-highgpu-1g=1,c2-standard-60=2,default=1' }}

  compilation_benchmarks:
    needs: [setup, build_e2e_test_artifacts]
    if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'compilation_benchmarks')
    uses: ./.github/workflows/benchmark_compilation.yml
    with:
      runner-group: ${{ needs.setup.outputs.runner-group }}
      runner-env: ${{ needs.setup.outputs.runner-env }}
      e2e-test-artifacts-dir: ${{ needs.build_e2e_test_artifacts.outputs.e2e-test-artifacts-dir }}
      e2e-test-artifacts-gcs-artifact-dir: ${{ needs.build_e2e_test_artifacts.outputs.e2e-test-artifacts-gcs-artifact-dir }}
      e2e-test-artifacts-build-log: ${{ needs.build_e2e_test_artifacts.outputs.e2e-test-artifacts-build-log }}
      e2e-test-artifacts-build-log-gcs-artifact: ${{ needs.build_e2e_test_artifacts.outputs.e2e-test-artifacts-build-log-gcs-artifact }}

  execution_benchmarks:
    needs: [setup, build_benchmark_tools, build_e2e_test_artifacts]
    if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'execution_benchmarks')
    uses: ./.github/workflows/benchmark_execution.yml
    with:
      # env.GCS_DIR is also duplicated in this workflow. See the note there on
      # why this is.
      runner-group: ${{ needs.setup.outputs.runner-group }}
      runner-env: ${{ needs.setup.outputs.runner-env }}
      e2e-test-artifacts-dir: ${{ needs.build_e2e_test_artifacts.outputs.e2e-test-artifacts-dir }}
      e2e-test-artifacts-gcs-artifact-dir: ${{ needs.build_e2e_test_artifacts.outputs.e2e-test-artifacts-gcs-artifact-dir }}
      benchmark-tools-gcs-artifact-dir: ${{ needs.build_benchmark_tools.outputs.benchmark-tools-gcs-artifact-dir }}

  summary:
    # Even if you have an explicit if condition, you still need to override
    # GitHub's default behavior of not running if any dependencies failed.
    if: always()
    runs-on: ubuntu-20.04
    needs:
      - setup
      - build_all
      - build_benchmark_tools
      - build_e2e_test_artifacts
      - compilation_benchmarks
      - execution_benchmarks
    steps:
      - name: Getting failed jobs
        id: failed_jobs
        run: |
          echo '${{ toJson(needs) }}'
          FAILED_JOBS="$(echo '${{ toJson(needs) }}' \
            | jq --raw-output \
            'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \
          )"
          echo "failed-jobs=${FAILED_JOBS}" >> $GITHUB_OUTPUT
          if [[ "${FAILED_JOBS}" != "" ]]; then
            echo "The following jobs failed: ${FAILED_JOBS}"
            exit 1
          fi
      - name: Posting to Discord
        uses: sarisia/actions-status-discord@61114b793b460ee85fe38ad3fccc78c7ead38d55 # v1.11.1
        if: failure() && github.ref_name == 'main'
        with:
          webhook: ${{ secrets.DISCORD_WEBHOOK }}
          description: "The following jobs failed: ${{ steps.failed_jobs.outputs.failed-jobs }}"