Skip to content

Add support for multi-GPU measurement processes #1220

Add support for multi-GPU measurement processes

Add support for multi-GPU measurement processes #1220

name: Testing::x86_64::LGPU-MPI::C++
on:
workflow_call:
inputs:
lightning-version:
type: string
required: true
description: The version of lightning to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master)
pennylane-version:
type: string
required: true
description: The version of PennyLane to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master)
push:
branches:
- main
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
- .github/workflows/tests_lgpumpi_cpp.yml
- pennylane_lightning/core/src/**
- '!pennylane_lightning/core/src/simulators/lightning_kokkos/**'
- '!pennylane_lightning/core/src/simulators/lightning_qubit/**'
- '!pennylane_lightning/core/src/simulators/lightning_tensor/**'
env:
COVERAGE_FLAGS: "--cov=pennylane_lightning --cov-report=term-missing --cov-report=xml:./coverage.xml --no-flaky-report -p no:warnings --tb=native"
GCC_VERSION: 11
OMP_NUM_THREADS: "2"
CI_CUDA_ARCH: 86
concurrency:
group: tests_lgpumpi_cpp-${{ github.ref }}-${{ github.event_name }}-${{ inputs.lightning-version }}-${{ inputs.pennylane-version }}
cancel-in-progress: true
jobs:
cpp_tests:
if: ${{ github.event.pull_request.draft == false && (contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') || contains(fromJSON('["schedule", "workflow_dispatch"]'), github.event_name)) }}
runs-on:
- self-hosted
- linux
- x64
- ubuntu-22.04
- multi-gpu
strategy:
max-parallel: 1
matrix:
mpilib: ["mpich", "openmpi"]
cuda_version_maj: ["12"]
cuda_version_min: ["2"]
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-tags: true
- name: Switch to stable build of Lightning-GPU
if: inputs.lightning-version == 'stable'
run: |
git fetch tags --force
git checkout latest_release
- uses: actions/setup-python@v5
id: setup_python
name: Install Python
with:
python-version: '3.10'
# Since the self-hosted runner can be re-used. It is best to set up all package
# installations in a virtual environment that gets cleaned at the end of each workflow run
- name: Setup Python virtual environment
id: setup_venv
env:
VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }}
run: |
# Clear any pre-existing venvs
rm -rf venv_*
# Create new venv for this workflow_run
python --version
python -m venv ${{ env.VENV_NAME }}
# Add the venv to PATH for subsequent steps
echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH
# Adding venv name as an output for subsequent steps to reference if needed
echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT
echo "Python_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV
echo "Python3_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV
- name: Display Python-Path
id: python_path
run: |
py_path=$(which python)
echo "Python Interpreter Path => $py_path"
echo "python=$py_path" >> $GITHUB_OUTPUT
pip_path=$(which python)
echo "PIP Path => $pip_path"
echo "pip=$pip_path" >> $GITHUB_OUTPUT
- name: Install required packages
run: |
python -m pip install -r requirements-dev.txt
python -m pip install cmake custatevec-cu12 scipy
- name: Validate GPU version and installed compiler and modules
run: |
source /etc/profile.d/modules.sh && module use /opt/modules && module load cuda/${{ matrix.cuda_version_maj }}
which -a nvcc
nvcc --version
ls -R /opt/modules
- name: Validate Multi-GPU packages
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
echo 'Checking for ${{ matrix.mpilib }}'
which -a mpirun
mpirun --version
which -a mpicxx
mpicxx --version
module unload ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
- name: Build and run unit tests
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum')")
cmake . -BBuild \
-DPL_BACKEND=lightning_gpu \
-DENABLE_PYTHON=OFF \
-DENABLE_MPI=ON \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_COVERAGE=ON \
-DBUILD_TESTS=ON \
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_CUDA_COMPILER=$(which nvcc) \
-DCMAKE_CUDA_ARCHITECTURES="86" \
-DCUQUANTUM_SDK=${CUQUANTUM_SDK} \
-DPython_EXECUTABLE:FILE="${{ steps.python_path.outputs.python }}" \
-G Ninja
cmake --build ./Build
cd ./Build
mkdir -p ./tests/results
for file in *runner ; do ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
for file in *runner_mpi ; do mpirun -np 2 ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
lcov --directory . -b ../pennylane_lightning/src --capture --output-file coverage.info
lcov --remove coverage.info '/usr/*' --output-file coverage.info
mv coverage.info coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}.info
- name: Upload test results
uses: actions/upload-artifact@v4
if: always()
with:
name: ubuntu-tests-reports-${{ matrix.mpilib }}
path: ./Build/tests/results/
retention-days: 1
if-no-files-found: error
include-hidden-files: true
- name: Upload code coverage results
uses: actions/upload-artifact@v4
with:
name: ubuntu-codecov-results-cpp-${{ matrix.mpilib }}
path: ./Build/coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}.info
retention-days: 1
if-no-files-found: error
include-hidden-files: true
- name: Cleanup
if: always()
run: |
rm -rf ${{ steps.setup_venv.outputs.venv_name }}
rm -rf * .git .gitignore .github
pip cache purge
upload-to-codecov-linux-cpp:
needs: ["cpp_tests"]
name: Upload coverage data to codecov
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download coverage reports
uses: actions/download-artifact@v4
with:
pattern: ubuntu-codecov-*
merge-multiple: true
- name: Upload to Codecov
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
verbose: true
token: ${{ secrets.CODECOV_TOKEN }}
- name: Cleanup
if: always()
run: |
rm -rf ${{ steps.setup_venv.outputs.venv_name }}
rm -rf * .git .gitignore .github
pip cache purge