Add support for multi-GPU measurement processes #1220
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Testing::x86_64::LGPU-MPI::C++ | |
on: | |
workflow_call: | |
inputs: | |
lightning-version: | |
type: string | |
required: true | |
description: The version of lightning to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master) | |
pennylane-version: | |
type: string | |
required: true | |
description: The version of PennyLane to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master) | |
push: | |
branches: | |
- main | |
pull_request: | |
types: | |
- opened | |
- reopened | |
- synchronize | |
- ready_for_review | |
paths: | |
- .github/workflows/tests_lgpumpi_cpp.yml | |
- pennylane_lightning/core/src/** | |
- '!pennylane_lightning/core/src/simulators/lightning_kokkos/**' | |
- '!pennylane_lightning/core/src/simulators/lightning_qubit/**' | |
- '!pennylane_lightning/core/src/simulators/lightning_tensor/**' | |
env: | |
COVERAGE_FLAGS: "--cov=pennylane_lightning --cov-report=term-missing --cov-report=xml:./coverage.xml --no-flaky-report -p no:warnings --tb=native" | |
GCC_VERSION: 11 | |
OMP_NUM_THREADS: "2" | |
CI_CUDA_ARCH: 86 | |
concurrency: | |
group: tests_lgpumpi_cpp-${{ github.ref }}-${{ github.event_name }}-${{ inputs.lightning-version }}-${{ inputs.pennylane-version }} | |
cancel-in-progress: true | |
jobs: | |
cpp_tests: | |
if: ${{ github.event.pull_request.draft == false && (contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') || contains(fromJSON('["schedule", "workflow_dispatch"]'), github.event_name)) }} | |
runs-on: | |
- self-hosted | |
- linux | |
- x64 | |
- ubuntu-22.04 | |
- multi-gpu | |
strategy: | |
max-parallel: 1 | |
matrix: | |
mpilib: ["mpich", "openmpi"] | |
cuda_version_maj: ["12"] | |
cuda_version_min: ["2"] | |
timeout-minutes: 30 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-tags: true | |
- name: Switch to stable build of Lightning-GPU | |
if: inputs.lightning-version == 'stable' | |
run: | | |
git fetch tags --force | |
git checkout latest_release | |
- uses: actions/setup-python@v5 | |
id: setup_python | |
name: Install Python | |
with: | |
python-version: '3.10' | |
# Since the self-hosted runner can be re-used. It is best to set up all package | |
# installations in a virtual environment that gets cleaned at the end of each workflow run | |
- name: Setup Python virtual environment | |
id: setup_venv | |
env: | |
VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }} | |
run: | | |
# Clear any pre-existing venvs | |
rm -rf venv_* | |
# Create new venv for this workflow_run | |
python --version | |
python -m venv ${{ env.VENV_NAME }} | |
# Add the venv to PATH for subsequent steps | |
echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH | |
# Adding venv name as an output for subsequent steps to reference if needed | |
echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT | |
echo "Python_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV | |
echo "Python3_ROOT_DIR=${{ env.VENV_NAME }}" >> $GITHUB_ENV | |
- name: Display Python-Path | |
id: python_path | |
run: | | |
py_path=$(which python) | |
echo "Python Interpreter Path => $py_path" | |
echo "python=$py_path" >> $GITHUB_OUTPUT | |
pip_path=$(which python) | |
echo "PIP Path => $pip_path" | |
echo "pip=$pip_path" >> $GITHUB_OUTPUT | |
- name: Install required packages | |
run: | | |
python -m pip install -r requirements-dev.txt | |
python -m pip install cmake custatevec-cu12 scipy | |
- name: Validate GPU version and installed compiler and modules | |
run: | | |
source /etc/profile.d/modules.sh && module use /opt/modules && module load cuda/${{ matrix.cuda_version_maj }} | |
which -a nvcc | |
nvcc --version | |
ls -R /opt/modules | |
- name: Validate Multi-GPU packages | |
run: | | |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }} | |
echo 'Checking for ${{ matrix.mpilib }}' | |
which -a mpirun | |
mpirun --version | |
which -a mpicxx | |
mpicxx --version | |
module unload ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }} | |
- name: Build and run unit tests | |
run: | | |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }} | |
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum')") | |
cmake . -BBuild \ | |
-DPL_BACKEND=lightning_gpu \ | |
-DENABLE_PYTHON=OFF \ | |
-DENABLE_MPI=ON \ | |
-DCMAKE_BUILD_TYPE=Debug \ | |
-DENABLE_COVERAGE=ON \ | |
-DBUILD_TESTS=ON \ | |
-DCMAKE_CXX_COMPILER=mpicxx \ | |
-DCMAKE_CUDA_COMPILER=$(which nvcc) \ | |
-DCMAKE_CUDA_ARCHITECTURES="86" \ | |
-DCUQUANTUM_SDK=${CUQUANTUM_SDK} \ | |
-DPython_EXECUTABLE:FILE="${{ steps.python_path.outputs.python }}" \ | |
-G Ninja | |
cmake --build ./Build | |
cd ./Build | |
mkdir -p ./tests/results | |
for file in *runner ; do ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done; | |
for file in *runner_mpi ; do mpirun -np 2 ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done; | |
lcov --directory . -b ../pennylane_lightning/src --capture --output-file coverage.info | |
lcov --remove coverage.info '/usr/*' --output-file coverage.info | |
mv coverage.info coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}.info | |
- name: Upload test results | |
uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: ubuntu-tests-reports-${{ matrix.mpilib }} | |
path: ./Build/tests/results/ | |
retention-days: 1 | |
if-no-files-found: error | |
include-hidden-files: true | |
- name: Upload code coverage results | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ubuntu-codecov-results-cpp-${{ matrix.mpilib }} | |
path: ./Build/coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}.info | |
retention-days: 1 | |
if-no-files-found: error | |
include-hidden-files: true | |
- name: Cleanup | |
if: always() | |
run: | | |
rm -rf ${{ steps.setup_venv.outputs.venv_name }} | |
rm -rf * .git .gitignore .github | |
pip cache purge | |
upload-to-codecov-linux-cpp: | |
needs: ["cpp_tests"] | |
name: Upload coverage data to codecov | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Download coverage reports | |
uses: actions/download-artifact@v4 | |
with: | |
pattern: ubuntu-codecov-* | |
merge-multiple: true | |
- name: Upload to Codecov | |
uses: codecov/codecov-action@v4 | |
with: | |
fail_ci_if_error: true | |
verbose: true | |
token: ${{ secrets.CODECOV_TOKEN }} | |
- name: Cleanup | |
if: always() | |
run: | | |
rm -rf ${{ steps.setup_venv.outputs.venv_name }} | |
rm -rf * .git .gitignore .github | |
pip cache purge |