.github/workflows/build.yml

# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application

on:
  push:
    branches:
      - master
  pull_request:
    paths-ignore:
      - 'docs/**'

permissions:
  contents: read

# https://stackoverflow.com/a/72408109/6388696
# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-concurrency-to-cancel-any-in-progress-job-or-run
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  linting:
    name: Run linting/pre-commit checks
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v4
        with:
          python-version: '3.10'
      - run: pip install pre-commit
      - run: pre-commit --version
      - run: pre-commit install
      - run: pre-commit run --all-files

  unit_tests:
    needs: [linting]
    runs-on: ${{ matrix.platform }}
    strategy:
      max-parallel: 4
      matrix:
        platform: [ubuntu-latest]
        python-version: ['3.10']
    steps:
    - uses: actions/checkout@v4
    - name: Setup Rye with caching of venvs
      uses: eifinger/setup-rye@v4
      id: setup-rye
      with:
        enable-cache: true
        github-token: ${{ secrets.GITHUB_TOKEN }}
        cache-prefix: ${{ matrix.python-version }}
    - name: Pin python-version ${{ matrix.python-version }}
      run: rye pin ${{ matrix.python-version }}
    - name: Install dependencies
      run: rye sync --no-lock
    - name: Test with pytest (very fast)
      env:
        JAX_PLATFORMS: cpu
      run: rye run pytest -v --shorter-than=1.0 --cov=project --cov-report=xml --cov-append --skip-if-files-missing
    - name: Test with pytest (fast)
      env:
        JAX_PLATFORMS: cpu
      run: rye run pytest -v --cov=project --cov-report=xml --cov-append --skip-if-files-missing

    - name: Store coverage report as an artifact
      uses: actions/upload-artifact@v4
      with:
        name: coverage-reports-unit-tests-${{ matrix.platform }}-${{ matrix.python-version }}
        path: ./coverage.xml

  local_integration_tests:
    needs: [unit_tests]
    runs-on: self-hosted
    strategy:
      max-parallel: 1
      matrix:
        python-version: ['3.10']
    steps:
    - uses: actions/checkout@v4
    - name: Setup Rye with caching of venvs
      uses: eifinger/setup-rye@v4
      id: setup-rye
      with:
        enable-cache: true
        github-token: ${{ secrets.GITHUB_TOKEN }}
        cache-prefix: ${{ matrix.python-version }}
    - name: Pin python-version ${{ matrix.python-version }}
      run: rye pin ${{ matrix.python-version }}

    - name: Install dependencies
      run: rye sync --no-lock

    - name: Test with pytest
      run: rye run pytest -v --cov=project --cov-report=xml --cov-append --skip-if-files-missing
    # TODO: this is taking too long to run, and is failing consistently. Need to debug this before making it part of the CI again.
    # - name: Test with pytest (only slow tests)
    #   run: pdm run pytest -v -m slow --slow --cov=project --cov-report=xml --cov-append

    - name: Store coverage report as an artifact
      uses: actions/upload-artifact@v4
      with:
        name: coverage-reports-integration-tests-${{ matrix.python-version }}
        path: ./coverage.xml

  launch-slurm-actions-runner:
    needs: [local_integration_tests]
    runs-on: self-hosted
    strategy:
      max-parallel: 5
      matrix:
        cluster: ['mila'] #, 'narval', 'beluga']
    outputs:
      job_id: ${{ steps.sbatch.outputs.stdout }}
    steps:
      - uses: actions/checkout@v4
      - name: Copy job script to the cluster
        run: "scp .github/actions-runner-job.sh ${{ matrix.cluster }}:actions-runner-job.sh"

      - name: Launch Slurm Actions Runner
        id: sbatch
        # TODO: for DRAC clusters, the account needs to be set somehow (and obviously not be hard-coded here).
        # Output the job ID to a file so that the next step can use it.
        # NOTE: Could also use the --wait flag to wait for the job to finish (and have this run at the same time as the other step).
        run: |
          job_id=`ssh ${{ matrix.cluster }} 'cd $SCRATCH && sbatch --parsable $HOME/actions-runner-job.sh'`
          echo "Submitted job $job_id on the ${{ matrix.cluster }} cluster!"
          echo "job_id=$job_id" >> "$GITHUB_OUTPUT"

  # This step runs in a self-hosted Github Actions runner inside a SLURM job on the compute node of the cluster.
  slurm_integration_tests:
    name: Run integration tests on the ${{ matrix.cluster }} cluster in job ${{ needs.launch-slurm-actions-runner.outputs.job_id}}
    needs: [launch-slurm-actions-runner]
    runs-on: ${{ matrix.cluster }}
    strategy:
      max-parallel: 5
      matrix:
        # TODO: this should be tied to the same setting in the `launch-slurm-actions-runner` job.
        # cluster: ${{ needs.launch-slurm-actions-runner.strategy.matrix.cluster }}
        cluster: ['mila']
    steps:
    - uses: actions/checkout@v4
    - name: Set up the repo using the setup script
      run: scripts/mila_setup.sh
    - name: Install dependencies
      run: rye sync --no-lock
    - name: Test with pytest
      run: rye run pytest -v --cov=project --cov-report=xml --cov-append --skip-if-files-missing

    # TODO: Re-enable this later
    # - name: Test with pytest (only slow tests)
    #   run: pdm run pytest -v -m slow --slow --cov=project --cov-report=xml --cov-append

    - name: Store coverage report as an artifact
      uses: actions/upload-artifact@v4
      with:
        name: coverage-reports-slurm-integration-tests-${{ matrix.cluster }}
        path: ./coverage.xml

  # https://about.codecov.io/blog/uploading-code-coverage-in-a-separate-job-on-github-actions/
  upload-coverage-codecov:
    needs: [local_integration_tests, slurm_integration_tests]
    runs-on: ubuntu-latest
    name: Upload coverage reports to Codecov
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          pattern: coverage-reports-*
          merge-multiple: false
          # download all the artifacts in this directory (each .coverage.xml will be in a subdirectory)
          # Next step if this doesn't work would be to give the coverage files a unique name and use merge-multiple: true
          path: coverage_reports
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v4
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          directory: coverage_reports
          fail_ci_if_error: true