From 8c6dcc939a43684a7762496205b120927aabfc10 Mon Sep 17 00:00:00 2001 From: tomlqc Date: Fri, 9 Aug 2024 15:09:56 -0400 Subject: [PATCH 1/3] Ray support for iqp_kernel --- nersc/README.md | 84 +++++++++++++++++++ nersc/performance_indicators/README_ray.md | 29 +++++++ .../generate_linearly_separable.py | 2 +- .../performance_indicators/perf_ind_kernel.py | 5 ++ nersc/run_hyperparam_search.sh | 27 ++++++ nersc/run_performance_indicator.sh | 7 ++ nersc/start-head.sh | 9 ++ nersc/start-worker.sh | 8 ++ nersc/submit_job_multinode.slr | 71 ++++++++++++++++ nersc/submit_job_shared.slr | 51 +++++++++++ nersc/submit_job_single.slr | 51 +++++++++++ nersc/submit_job_venv.slr | 48 +++++++++++ nersc/wrap_podman.sh | 51 ++++++----- src/qml_benchmarks/models/iqp_kernel.py | 26 +++++- 14 files changed, 446 insertions(+), 23 deletions(-) create mode 100644 nersc/README.md create mode 100644 nersc/performance_indicators/README_ray.md create mode 100644 nersc/run_hyperparam_search.sh create mode 100644 nersc/run_performance_indicator.sh create mode 100644 nersc/start-head.sh create mode 100644 nersc/start-worker.sh create mode 100644 nersc/submit_job_multinode.slr create mode 100644 nersc/submit_job_shared.slr create mode 100644 nersc/submit_job_single.slr create mode 100644 nersc/submit_job_venv.slr diff --git a/nersc/README.md b/nersc/README.md new file mode 100644 index 00000000..1883948a --- /dev/null +++ b/nersc/README.md @@ -0,0 +1,84 @@ +# Run QML Benchmarks on Perlmutter + +## Setup Podman + +All the following commands to be executed on Perlmutter. + +Build podman image from dockerfile: +``` +podman-hpc build -f Dockerfile.ubu22-PennyLane -t tgermain/ubu22-pennylane > podman_build.out +``` + +**TODO:** Add command to install Ray in dockerfile + +... or install in new image +``` +podman-hpc run -it --name ray tgermain/ubu22-pennylane +# in the container +pip install ray +exit +# +podman-hpc commit ray tgermain/ubu22-pennylane-ray +``` + +Locally install `qml_benchmarks` with dependencies for development: +``` +mkdir qml-benchmarks-devel/nersc/local + +IMG=tgermain/ubu22-pennylane-ray +CFSH=/global/cfs/cdirs/m4693 # CFS home +REPO_DIR=$CFSH/qml-benchmarks-devel # qml-benchmark repo +LOCAL_DIR=$REPO_DIR/nersc/local # to store local python files +WORK_DIR=$REPO_DIR/nersc/ray/workdir # to store output files + +podman-hpc run -it \ + --volume $LOCAL_DIR:/root \ + --volume $REPO_DIR:/qml-benchmarks \ + --volume $WORK_DIR:/work_dir \ + -e HDF5_USE_FILE_LOCKING='FALSE' \ + --workdir /work_dir \ + $IMG bash + +# in the container +cd /qml-benchmarks +pip3 install --user . # install in /root/.local +``` + +**Note:** `pip3 install --user .` will install in `/root/.local`, mounted to container. + +To make image available on CPU/GPU nodes, migrate your image onto the `$SCRATCH` filesystem with: +``` +podman-hpc migrate tgermain/ubu22-pennylane-ray[:version] +``` +or make available for everyone in project: +``` +IMG=tgermain/ubu22-pennylane-ray +POD_PUB=$CFS/m4693/podman/ +podman-hpc --squash-dir $POD_PUB migrate $IMG +chmod -R a+rx $POD_PUB # to allow anyone to use this image +``` + +**TODO:** Check and update instructions about migrate for project + +## Test Podman + +``` bash + +IMG=tgermain/ubu22-pennylane-ray +CMD="python3 -u performance_indicators/perf_ind_variational.py --numFeatures 4 --inputPath performance_indicators/linearly_separable/" + +# Run container interactively with wrapper +./wrap_podman.sh $IMG "$CMD" +``` + +## Run jobs + +Run performance indicator (1 model: 100 training steps and prediction) +``` bash +sbatch submit_job_shared.str +``` + +Run hyperparameter search +``` bash +sbatch submit_job_single.str +``` diff --git a/nersc/performance_indicators/README_ray.md b/nersc/performance_indicators/README_ray.md new file mode 100644 index 00000000..9d7f9b23 --- /dev/null +++ b/nersc/performance_indicators/README_ray.md @@ -0,0 +1,29 @@ + + +## Run with Python `venv` + +### `lightning-kokkos` from pypi wheels + +Start interactive job on CPU node for testing +``` bash +salloc -q interactive -C cpu -t 0:30:00 -A m4693 + +# and execute in this interactive session: + +source /global/common/software/m4693/venv/qml_LK/bin/activate +cd nersc/ + +# to restrict the number of threads: +#export OMP_NUM_THREADS=32 +python3 -u performance_indicators/perf_ind_kernel.py --numFeatures 4 --inputPath performance_indicators/linearly_separable/ +``` + +Runtimes with Ray on CPU node, dataset with 240x240 = 57,600 kernels +``` +> 57600 / 128 = 450 kernels per core +> default.qubit.jax +qubits 4 10 15 +real 1m33s 2m42 11m27 +user 0m46s 1m00 4m20 +sys 0m22s 0m28 2m21 +``` \ No newline at end of file diff --git a/nersc/performance_indicators/generate_linearly_separable.py b/nersc/performance_indicators/generate_linearly_separable.py index 477f3273..18c9d147 100644 --- a/nersc/performance_indicators/generate_linearly_separable.py +++ b/nersc/performance_indicators/generate_linearly_separable.py @@ -25,7 +25,7 @@ n_samples = 300 -for n_features in range(2, 21): +for n_features in range(2, 31): margin = 0.02 * n_features X, y = generate_linearly_separable(n_samples, n_features, margin) diff --git a/nersc/performance_indicators/perf_ind_kernel.py b/nersc/performance_indicators/perf_ind_kernel.py index e633af24..37fc6f49 100644 --- a/nersc/performance_indicators/perf_ind_kernel.py +++ b/nersc/performance_indicators/perf_ind_kernel.py @@ -8,6 +8,9 @@ import os import yaml import subprocess + +import ray + from qml_benchmarks.hyperparam_search_utils import read_data import argparse @@ -47,6 +50,8 @@ def get_parser(): perf_ind_name = 'CAT_CPU' #a name for the performance indicator used for naming files + ray.init() + ################################# n_features = args.numFeatures # dataset dimension diff --git a/nersc/run_hyperparam_search.sh b/nersc/run_hyperparam_search.sh new file mode 100644 index 00000000..e53e043c --- /dev/null +++ b/nersc/run_hyperparam_search.sh @@ -0,0 +1,27 @@ +#!/bin/bash -e + +export RAY_DEDUP_LOGS=0 + +REPO=/qml-benchmarks +DATA=${REPO}/nersc/performance_indicators/linearly_separable + +GENERATE_DATA=0 +# running python paper/benchmarks/generate_linearly_separable.py will generate a folder linearly_separable/ (in the current directory). +if [[ GENERATE_DATA == 1 ]]; then + python ${REPO}/paper/benchmarks/generate_linearly_separable.py +fi + +# You can then use any of the *.csv from this folder to start training. e.g. +#python ${QML}/scripts/run_hyperparameter_search.py\ +# --classifier-name IQPVariationalClassifier\ +# --dataset-path linearly_separable/linearly_separable_10d_train.csv + +# I reduced the grid space and the input size for a faster turn around. This was my command: +python3 -u ${REPO}/scripts/run_hyperparameter_search.py\ + --classifier-name IQPVariationalClassifier\ + --dataset-path ${DATA}/linearly_separable_4d_train.csv\ + --clean True +# --n-jobs 256 # 4 +# --n_layers 1 2\ +# --learning_rate 0.001\ +# --repeats 1\ diff --git a/nersc/run_performance_indicator.sh b/nersc/run_performance_indicator.sh new file mode 100644 index 00000000..5e00b4ca --- /dev/null +++ b/nersc/run_performance_indicator.sh @@ -0,0 +1,7 @@ +#!/bin/bash -e + +export RAY_DEDUP_LOGS=0 + +NUM_FEATURES=15 + +python3 -u performance_indicators/perf_ind_kernel.py --numFeatures $NUM_FEATURES --inputPath performance_indicators/linearly_separable/ diff --git a/nersc/start-head.sh b/nersc/start-head.sh new file mode 100644 index 00000000..11c03627 --- /dev/null +++ b/nersc/start-head.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +export LC_ALL=C.UTF-8 +export LANG=C.UTF-8 + +echo "starting ray head node" +# Launch the head node +ray start --head --node-ip-address=$1 --port=6379 # --redis-password=$2 +sleep infinity diff --git a/nersc/start-worker.sh b/nersc/start-worker.sh new file mode 100644 index 00000000..52a8104e --- /dev/null +++ b/nersc/start-worker.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +export LC_ALL=C.UTF-8 +export LANG=C.UTF-8 + +echo "starting ray worker node" +ray start --address $1 # --redis-password=$2 +sleep infinity diff --git a/nersc/submit_job_multinode.slr b/nersc/submit_job_multinode.slr new file mode 100644 index 00000000..c646dbda --- /dev/null +++ b/nersc/submit_job_multinode.slr @@ -0,0 +1,71 @@ +#!/bin/bash +#SBATCH -A m4693 +#SBATCH -C cpu +#SBATCH -t 0:10:00 +#SBATCH --job-name=qml_multinode +#SBATCH --nodes=2 +#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node +#SBATCH --cpus-per-task=128 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=thomas.germain@xanadu.ai +#SBATCH --output=out/%j.%x.out +#SBATCH --error=out/%j.%x.err + +#------------------------------------------------------------------- +# SBATCH -q shared +# SBATCH --gpus-per-task=4 # 4 GPUs per node + +set -u ; # exit if you try to use an uninitialized variable + +echo "S:starting" +echo "S:PWD=$PWD" + +################################################################ +# Define image and command +################################################################ + +IMG=tgermain/ubu22-pennylane-ray +CMD="./run_hyperparam_search.sh" + +echo "S:IMG=$IMG" +echo "S:CMD=$CMD" + +################################################################ +# Initialize Ray cluster environment +################################################################ + +head_node=$(hostname) +head_node_ip=$(hostname --ip-address) +# if we detect a space character in the head node IP, we'll +# convert it to an ipv4 address. This step is optional. +if [[ "$head_node_ip" == *" "* ]]; then +IFS=' ' read -ra ADDR <<<"$head_node_ip" +if [[ ${#ADDR[0]} -gt 16 ]]; then + head_node_ip=${ADDR[1]} +else + head_node_ip=${ADDR[0]} +fi +fi +port=6379 + +echo "STARTING HEAD at $head_node" +echo "Head node IP: $head_node_ip" +srun --nodes=1 --ntasks=1 -w $head_node ./wrap_podman.sh $IMG "./start-head.sh $head_node_ip" & +sleep 10 + +worker_num=$(($SLURM_JOB_NUM_NODES - 1)) #number of nodes other than the head node +srun -n $worker_num --nodes=$worker_num --ntasks-per-node=1 --exclude $head_node ./wrap_podman.sh $IMG "./start-worker.sh $head_node_ip:$port" & +sleep 5 + +################################################################ +# Run Python scripts in container +################################################################ + +echo "S:ready to run" + +./wrap_podman.sh $IMG "$CMD" "$head_node_ip:$port" + +sleep 1 +echo S:done + +# EOF diff --git a/nersc/submit_job_shared.slr b/nersc/submit_job_shared.slr new file mode 100644 index 00000000..17ce938d --- /dev/null +++ b/nersc/submit_job_shared.slr @@ -0,0 +1,51 @@ +#!/bin/bash +#SBATCH -A m4693 +#SBATCH -C cpu +#SBATCH -t 0:10:00 +#SBATCH --job-name=qml_shared +#SBATCH -q shared +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node +#SBATCH --cpus-per-task=8 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=thomas.germain@xanadu.ai +#SBATCH --output=out/%j.%x.out +#SBATCH --error=out/%j.%x.err + +#------------------------------------------------------------------- +# SBATCH --gpus-per-task=4 # 4 GPUs per node + +set -u ; # exit if you try to use an uninitialized variable + +echo "S:starting" +echo "S:PWD=$PWD" + +################################################################ +# Define image and command +################################################################ + +IMG=tgermain/ubu22-pennylane-ray +CMD="./run_performance_indicator.sh" + +echo "S:IMG=$IMG" +echo "S:CMD=$CMD" + +################################################################ +# Initialize Ray cluster environment +################################################################ + +# ... + +################################################################ +# Run Python scripts in container +################################################################ + +N=1 + +echo "S:ready to run" +srun -n $N ./wrap_podman.sh $IMG "$CMD" + +sleep 1 +echo S:done + +# EOF diff --git a/nersc/submit_job_single.slr b/nersc/submit_job_single.slr new file mode 100644 index 00000000..0cbb45e3 --- /dev/null +++ b/nersc/submit_job_single.slr @@ -0,0 +1,51 @@ +#!/bin/bash +#SBATCH -A m4693 +#SBATCH -C cpu +#SBATCH -t 0:10:00 +#SBATCH --job-name=qml_single +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node +#SBATCH --cpus-per-task=256 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=thomas.germain@xanadu.ai +#SBATCH --output=out/%j.%x.out +#SBATCH --error=out/%j.%x.err + +#------------------------------------------------------------------- +# SBATCH -q shared +# SBATCH --gpus-per-task=4 # 4 GPUs per node + +set -u ; # exit if you try to use an uninitialized variable + +echo "S:starting" +echo "S:PWD=$PWD" + +################################################################ +# Define image and command +################################################################ + +IMG=tgermain/ubu22-pennylane-ray +CMD="./run_hyperparam_search.sh" + +echo "S:IMG=$IMG" +echo "S:CMD=$CMD" + +################################################################ +# Initialize Ray cluster environment +################################################################ + +# ... + +################################################################ +# Run Python scripts in container +################################################################ + +N=1 + +echo "S:ready to run" +srun -n $N ./wrap_podman.sh $IMG "$CMD" + +sleep 1 +echo S:done + +# EOF diff --git a/nersc/submit_job_venv.slr b/nersc/submit_job_venv.slr new file mode 100644 index 00000000..3cc02fd9 --- /dev/null +++ b/nersc/submit_job_venv.slr @@ -0,0 +1,48 @@ +#!/bin/bash +#SBATCH -A m4693 +#SBATCH -C cpu +#SBATCH -t 00:30:00 +#SBATCH --job-name=qml_single +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node +#SBATCH --cpus-per-task=256 +#SBATCH --mail-type=ALL +#SBATCH --mail-user=thomas.germain@xanadu.ai +#SBATCH --output=out/%j.%x.out +#SBATCH --error=out/%j.%x.err + +#------------------------------------------------------------------- +# SBATCH -q shared +# SBATCH --gpus-per-task=4 # 4 GPUs per node + +set -u ; # exit if you try to use an uninitialized variable + +echo "S:starting" + +################################################################ +# Define venv and command +################################################################ + +module load python +source /global/common/software/m4693/venv/qml_LK/bin/activate + +cd /global/cfs/cdirs/m4693/qml-benchmarks-devel/nersc/ + +CMD="./run_performance_indicator.sh" + +echo "S:PWD=$PWD" +echo "S:CMD=$CMD" + +################################################################ +# Run Python scripts in container +################################################################ + +N=1 + +echo "S:ready to run" +srun -n $N $CMD + +sleep 1 +echo S:done + +# EOF diff --git a/nersc/wrap_podman.sh b/nersc/wrap_podman.sh index ee8ec5d0..c352bb30 100755 --- a/nersc/wrap_podman.sh +++ b/nersc/wrap_podman.sh @@ -2,37 +2,46 @@ echo W:myRank is $SLURM_PROCID IMG=$1 CMD=$2 -outPath=$3 -CFSH=$4 -BASE_DIR=$5 -WORK_DIR=$6 +RAY_ADDRESS=$3 -if [ $SLURM_PROCID -eq 0 ] ; then +if [[ $SLURM_PROCID -eq 0 ]]; then echo W:IMG=$IMG echo W:CMD=$CMD - #echo Q:fire $ fi -echo W:BASE_DIR=$BASE_DIR -echo 'W:start podman' +CFSH=/global/cfs/cdirs/m4693 # CFS home +REPO_DIR=$CFSH/qml-benchmarks-devel # qml-benchmark repo +ROOT_DIR=$REPO_DIR/nersc/root # to access local python packages +WORK_DIR=$REPO_DIR/nersc # to store output files + +# Mount /tmp to avoid following error with Ray: +# ValueError: Can't find a `node_ip_address.json` file + +PORT=6379 + +# Script will run in the workdir mounted in the container, +# this will allow us to access the output files easily. + podman-hpc run -it \ - --volume $CFSH/$BASE_DIR:/root \ - --volume $CFSH/$BASE_DIR:$BASE_DIR \ - --volume $CFSH/$BASE_DIR/nersc/performance_indicators/linearly_separable:/linearly_separable \ - --volume $CFSH/$WORK_DIR:$WORK_DIR \ + --net host \ + -p $PORT:$PORT \ + --volume /tmp:/tmp \ + --volume $ROOT_DIR:/root \ + --volume $REPO_DIR:/qml-benchmarks \ + --volume $WORK_DIR:/work_dir \ + --workdir /work_dir \ -e HDF5_USE_FILE_LOCKING='FALSE' \ - --workdir $WORK_DIR \ + --shm-size=10.24gb \ $IMG < Date: Fri, 9 Aug 2024 12:25:09 -0700 Subject: [PATCH 2/3] README_ray: Runtime with Ray on batch CPU node, 15 qubits --- nersc/performance_indicators/README_ray.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/nersc/performance_indicators/README_ray.md b/nersc/performance_indicators/README_ray.md index 9d7f9b23..db36c55f 100644 --- a/nersc/performance_indicators/README_ray.md +++ b/nersc/performance_indicators/README_ray.md @@ -18,12 +18,27 @@ cd nersc/ python3 -u performance_indicators/perf_ind_kernel.py --numFeatures 4 --inputPath performance_indicators/linearly_separable/ ``` -Runtimes with Ray on CPU node, dataset with 240x240 = 57,600 kernels +Runtimes with Ray on interactive CPU node, dataset with 240x240 = 57,600 kernels ``` > 57600 / 128 = 450 kernels per core > default.qubit.jax qubits 4 10 15 real 1m33s 2m42 11m27 user 0m46s 1m00 4m20 -sys 0m22s 0m28 2m21 +sys 0m22s 0m28 2m21 +``` + +Runtime with Ray on batch CPU node, 15 qubits +``` +Job ID: 28820822 +Cluster: perlmutter +User/Group: tgermain/tgermain +State: COMPLETED (exit code 0) +Nodes: 1 +Cores per node: 256 +CPU Utilized: 1-19:42:40 +CPU Efficiency: 86.94% of 2-02:16:32 core-walltime +Job Wall-clock time: 00:11:47 +Memory Utilized: 314.08 GB +Memory Efficiency: 65.90% of 476.56 GB ``` \ No newline at end of file From 3cab6a407a0ed39cfa08281ff109e46aa22bb3c9 Mon Sep 17 00:00:00 2001 From: tomlqc Date: Fri, 9 Aug 2024 15:38:05 -0700 Subject: [PATCH 3/3] Run perf_ind_kernel.py with ray --- ...parable_15d_performance_indicators_RAY.csv | 2 + ...5d_performance_indicators_RAY_packages.txt | 82 +++++++++++++++++++ nersc/performance_indicators/README_ray.md | 9 +- .../performance_indicators/perf_ind_kernel.py | 13 +-- nersc/run_hyperparam_search.sh | 0 nersc/run_performance_indicator.sh | 2 +- nersc/submit_job_single.slr | 4 +- src/qml_benchmarks/models/iqp_kernel.py | 19 +++-- 8 files changed, 115 insertions(+), 16 deletions(-) create mode 100644 nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv create mode 100644 nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt mode change 100644 => 100755 nersc/run_hyperparam_search.sh mode change 100644 => 100755 nersc/run_performance_indicator.sh diff --git a/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv new file mode 100644 index 00000000..b1e5fe50 --- /dev/null +++ b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv @@ -0,0 +1,2 @@ +construct_kernel_time,training_time,predict_time,hyperparameters +127.64588618278503,127.65117883682251,23.83331537246704,"{'repeats': 10, 'use_jax': False, 'vmap': True, 'jit': False, 'use_ray': True}" diff --git a/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt new file mode 100644 index 00000000..272679a6 --- /dev/null +++ b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt @@ -0,0 +1,82 @@ +Package Version Editable project location +-------------------------- ----------- -------------------------------------------- +absl-py 2.1.0 +aiosignal 1.3.1 +appdirs 1.4.4 +astunparse 1.6.3 +attrs 23.2.0 +autograd 1.6.2 +autoray 0.6.12 +cachetools 5.4.0 +certifi 2024.7.4 +charset-normalizer 3.3.2 +chex 0.1.86 +click 8.1.7 +contourpy 1.2.1 +cycler 0.12.1 +diastatic-malt 2.15.2 +etils 1.9.2 +filelock 3.15.4 +flax 0.8.5 +fonttools 4.53.1 +frozenlist 1.4.1 +fsspec 2024.6.1 +future 1.0.0 +gast 0.6.0 +idna 3.7 +importlib_resources 6.4.0 +jax 0.4.23 +jaxlib 0.4.23 +joblib 1.4.2 +jsonschema 4.23.0 +jsonschema-specifications 2023.12.1 +kiwisolver 1.4.5 +markdown-it-py 3.0.0 +matplotlib 3.9.1 +mdurl 0.1.2 +ml-dtypes 0.4.0 +msgpack 1.0.8 +nersc-pymon 0.2.1 +nest-asyncio 1.6.0 +networkx 3.3 +numpy 1.26.4 +opt-einsum 3.3.0 +optax 0.2.3 +orbax-checkpoint 0.5.23 +packaging 24.1 +pandas 2.2.2 +PennyLane 0.37.0 +PennyLane-Catalyst 0.7.0 +PennyLane_Lightning 0.37.0 +PennyLane_Lightning_Kokkos 0.37.0 +pillow 10.4.0 +pip 23.2.1 +protobuf 5.27.2 +Pygments 2.18.0 +pyparsing 3.1.2 +python-dateutil 2.9.0.post0 +pytz 2024.1 +PyYAML 6.0.1 +qml_benchmarks 0.1 /global/cfs/cdirs/m4693/qml-benchmarks-devel +ray 2.33.0 +referencing 0.35.1 +requests 2.32.3 +rich 13.7.1 +rpds-py 0.19.1 +rustworkx 0.15.1 +scikit-learn 1.5.1 +scipy 1.12.0 +seaborn 0.13.2 +semantic-version 2.10.0 +setuptools 65.5.0 +six 1.16.0 +tensorstore 0.1.63 +termcolor 2.4.0 +threadpoolctl 3.5.0 +toml 0.10.2 +toolz 0.12.1 +typing_extensions 4.12.2 +tzdata 2024.1 +urllib3 2.2.2 +wheel 0.43.0 +zipp 3.19.2 diff --git a/nersc/performance_indicators/README_ray.md b/nersc/performance_indicators/README_ray.md index db36c55f..96c44678 100644 --- a/nersc/performance_indicators/README_ray.md +++ b/nersc/performance_indicators/README_ray.md @@ -4,6 +4,8 @@ ### `lightning-kokkos` from pypi wheels +NOTE: `venv/qml_LK` is described in `single_circuits/README.md` + Start interactive job on CPU node for testing ``` bash salloc -q interactive -C cpu -t 0:30:00 -A m4693 @@ -13,8 +15,6 @@ salloc -q interactive -C cpu -t 0:30:00 -A m4693 source /global/common/software/m4693/venv/qml_LK/bin/activate cd nersc/ -# to restrict the number of threads: -#export OMP_NUM_THREADS=32 python3 -u performance_indicators/perf_ind_kernel.py --numFeatures 4 --inputPath performance_indicators/linearly_separable/ ``` @@ -28,6 +28,11 @@ user 0m46s 1m00 4m20 sys 0m22s 0m28 2m21 ``` +Start batch job on CPU node +``` bash +sbatch submit_job_single.slr +``` + Runtime with Ray on batch CPU node, 15 qubits ``` Job ID: 28820822 diff --git a/nersc/performance_indicators/perf_ind_kernel.py b/nersc/performance_indicators/perf_ind_kernel.py index 37fc6f49..6dfb0dba 100644 --- a/nersc/performance_indicators/perf_ind_kernel.py +++ b/nersc/performance_indicators/perf_ind_kernel.py @@ -40,17 +40,20 @@ def get_parser(): # You only need to change this to make a different performance indicator #define the model - from qml_benchmarks.models.projected_quantum_kernel import ProjectedQuantumKernel as Model + from qml_benchmarks.models.iqp_kernel import IQPKernelClassifier as Model #implementation attributes of model use_jax = False vmap = True - jit = True - model_settings = {'use_jax': use_jax, 'vmap': vmap, 'jit': jit} + jit = False + use_ray = True + model_settings = {'use_jax': use_jax, 'vmap': vmap, 'jit': jit, + 'use_ray': use_ray} - perf_ind_name = 'CAT_CPU' #a name for the performance indicator used for naming files + perf_ind_name = 'RAY' #a name for the performance indicator used for naming files - ray.init() + if use_ray: + ray.init() ################################# diff --git a/nersc/run_hyperparam_search.sh b/nersc/run_hyperparam_search.sh old mode 100644 new mode 100755 diff --git a/nersc/run_performance_indicator.sh b/nersc/run_performance_indicator.sh old mode 100644 new mode 100755 index 5e00b4ca..c2abeb44 --- a/nersc/run_performance_indicator.sh +++ b/nersc/run_performance_indicator.sh @@ -2,6 +2,6 @@ export RAY_DEDUP_LOGS=0 -NUM_FEATURES=15 +NUM_FEATURES=20 python3 -u performance_indicators/perf_ind_kernel.py --numFeatures $NUM_FEATURES --inputPath performance_indicators/linearly_separable/ diff --git a/nersc/submit_job_single.slr b/nersc/submit_job_single.slr index 0cbb45e3..4df2728c 100644 --- a/nersc/submit_job_single.slr +++ b/nersc/submit_job_single.slr @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH -A m4693 #SBATCH -C cpu -#SBATCH -t 0:10:00 +#SBATCH -t 0:30:00 #SBATCH --job-name=qml_single #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node @@ -25,7 +25,7 @@ echo "S:PWD=$PWD" ################################################################ IMG=tgermain/ubu22-pennylane-ray -CMD="./run_hyperparam_search.sh" +CMD="./run_performance_indicator.sh" echo "S:IMG=$IMG" echo "S:CMD=$CMD" diff --git a/src/qml_benchmarks/models/iqp_kernel.py b/src/qml_benchmarks/models/iqp_kernel.py index f37132ef..e7ad82c4 100644 --- a/src/qml_benchmarks/models/iqp_kernel.py +++ b/src/qml_benchmarks/models/iqp_kernel.py @@ -40,6 +40,7 @@ def __init__( use_jax=False, vmap=True, jit=True, + use_ray=False, random_state=42, scaling=1.0, max_vmap=250, @@ -92,6 +93,7 @@ def __init__( self.use_jax = use_jax self.vmap = vmap self.jit = jit + self.use_ray = use_ray # data-dependant attributes # which will be initialised by calling "fit" @@ -103,8 +105,10 @@ def __init__( def generate_key(self): return self.rng.integers(1000000) - def construct_circuit(self): - dev = qml.device(self.dev_type, wires=self.n_qubits_) + def construct_circuit(self, dev=None): + + if dev is None: + dev = qml.device(self.dev_type, wires=self.n_qubits_) def wrapped_circuit(x): @qml.qnode(dev, **self.qnode_kwargs) @@ -139,7 +143,8 @@ def circuit(x): else: circuit = qjit(circuit) - self.circuit = circuit + if dev is None: + self.circuit = circuit return circuit @@ -155,17 +160,19 @@ def precompute_kernel(self, X1, X2): dim1 = len(X1) dim2 = len(X2) - circuit = self.construct_circuit() - if self.use_ray: # concatenate all pairs of vectors Z = np.array([np.concatenate((X1[i], X2[j])) for i in range(dim1) for j in range(dim2)]) @ray.remote def run_circuit(x): + # dev is not pickable so it must be created within ray job. + dev = qml.device(self.dev_type, wires=self.n_qubits_) + circuit = self.construct_circuit(dev) + # TODO: run batch of circuits return circuit(x) - print("precompute_kernel() start") + print("precompute_kernel() start (+)") print(Z.shape) kernel_values = []