diff --git a/nersc/README.md b/nersc/README.md
new file mode 100644
index 0000000..1883948
--- /dev/null
+++ b/nersc/README.md
@@ -0,0 +1,84 @@
+# Run QML Benchmarks on Perlmutter
+
+## Setup Podman
+
+All the following commands to be executed on Perlmutter.
+
+Build podman image from dockerfile:
+```
+podman-hpc build -f Dockerfile.ubu22-PennyLane -t tgermain/ubu22-pennylane > podman_build.out
+```
+
+**TODO:** Add command to install Ray in dockerfile
+
+... or install in new image
+```
+podman-hpc run -it --name ray tgermain/ubu22-pennylane
+# in the container
+pip install ray
+exit
+# 
+podman-hpc commit ray tgermain/ubu22-pennylane-ray
+```
+
+Locally install `qml_benchmarks` with dependencies for development:
+```
+mkdir qml-benchmarks-devel/nersc/local
+
+IMG=tgermain/ubu22-pennylane-ray
+CFSH=/global/cfs/cdirs/m4693  # CFS home
+REPO_DIR=$CFSH/qml-benchmarks-devel  # qml-benchmark repo
+LOCAL_DIR=$REPO_DIR/nersc/local  # to store local python files
+WORK_DIR=$REPO_DIR/nersc/ray/workdir  # to store output files
+
+podman-hpc run -it \
+    --volume $LOCAL_DIR:/root \
+    --volume $REPO_DIR:/qml-benchmarks \
+    --volume $WORK_DIR:/work_dir \
+    -e HDF5_USE_FILE_LOCKING='FALSE' \
+    --workdir /work_dir \
+    $IMG bash
+
+# in the container
+cd /qml-benchmarks
+pip3 install --user .  # install in /root/.local
+```
+
+**Note:** `pip3 install --user .` will install in `/root/.local`, mounted to container.
+
+To make image available on CPU/GPU nodes, migrate your image onto the `$SCRATCH` filesystem with:
+```
+podman-hpc migrate tgermain/ubu22-pennylane-ray[:version]
+```
+or make available for everyone in project:
+```
+IMG=tgermain/ubu22-pennylane-ray
+POD_PUB=$CFS/m4693/podman/
+podman-hpc --squash-dir $POD_PUB migrate $IMG
+chmod -R a+rx $POD_PUB   # to allow anyone to use this image
+```
+
+**TODO:** Check and update instructions about migrate for project
+
+## Test Podman
+
+``` bash
+
+IMG=tgermain/ubu22-pennylane-ray
+CMD="python3 -u performance_indicators/perf_ind_variational.py --numFeatures 4 --inputPath performance_indicators/linearly_separable/"
+
+# Run container interactively with wrapper
+./wrap_podman.sh $IMG "$CMD"
+```
+
+## Run jobs
+
+Run performance indicator (1 model: 100 training steps and prediction)
+``` bash
+sbatch submit_job_shared.str
+```
+
+Run hyperparameter search
+``` bash
+sbatch submit_job_single.str
+```
diff --git a/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv
new file mode 100644
index 0000000..b1e5fe5
--- /dev/null
+++ b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY.csv
@@ -0,0 +1,2 @@
+construct_kernel_time,training_time,predict_time,hyperparameters
+127.64588618278503,127.65117883682251,23.83331537246704,"{'repeats': 10, 'use_jax': False, 'vmap': True, 'jit': False, 'use_ray': True}"
diff --git a/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt
new file mode 100644
index 0000000..272679a
--- /dev/null
+++ b/nersc/performance_indicators/RAY/IQPKernelClassifier_linearly_separable_15d_performance_indicators_RAY_packages.txt
@@ -0,0 +1,82 @@
+Package                    Version     Editable project location
+-------------------------- ----------- --------------------------------------------
+absl-py                    2.1.0
+aiosignal                  1.3.1
+appdirs                    1.4.4
+astunparse                 1.6.3
+attrs                      23.2.0
+autograd                   1.6.2
+autoray                    0.6.12
+cachetools                 5.4.0
+certifi                    2024.7.4
+charset-normalizer         3.3.2
+chex                       0.1.86
+click                      8.1.7
+contourpy                  1.2.1
+cycler                     0.12.1
+diastatic-malt             2.15.2
+etils                      1.9.2
+filelock                   3.15.4
+flax                       0.8.5
+fonttools                  4.53.1
+frozenlist                 1.4.1
+fsspec                     2024.6.1
+future                     1.0.0
+gast                       0.6.0
+idna                       3.7
+importlib_resources        6.4.0
+jax                        0.4.23
+jaxlib                     0.4.23
+joblib                     1.4.2
+jsonschema                 4.23.0
+jsonschema-specifications  2023.12.1
+kiwisolver                 1.4.5
+markdown-it-py             3.0.0
+matplotlib                 3.9.1
+mdurl                      0.1.2
+ml-dtypes                  0.4.0
+msgpack                    1.0.8
+nersc-pymon                0.2.1
+nest-asyncio               1.6.0
+networkx                   3.3
+numpy                      1.26.4
+opt-einsum                 3.3.0
+optax                      0.2.3
+orbax-checkpoint           0.5.23
+packaging                  24.1
+pandas                     2.2.2
+PennyLane                  0.37.0
+PennyLane-Catalyst         0.7.0
+PennyLane_Lightning        0.37.0
+PennyLane_Lightning_Kokkos 0.37.0
+pillow                     10.4.0
+pip                        23.2.1
+protobuf                   5.27.2
+Pygments                   2.18.0
+pyparsing                  3.1.2
+python-dateutil            2.9.0.post0
+pytz                       2024.1
+PyYAML                     6.0.1
+qml_benchmarks             0.1         /global/cfs/cdirs/m4693/qml-benchmarks-devel
+ray                        2.33.0
+referencing                0.35.1
+requests                   2.32.3
+rich                       13.7.1
+rpds-py                    0.19.1
+rustworkx                  0.15.1
+scikit-learn               1.5.1
+scipy                      1.12.0
+seaborn                    0.13.2
+semantic-version           2.10.0
+setuptools                 65.5.0
+six                        1.16.0
+tensorstore                0.1.63
+termcolor                  2.4.0
+threadpoolctl              3.5.0
+toml                       0.10.2
+toolz                      0.12.1
+typing_extensions          4.12.2
+tzdata                     2024.1
+urllib3                    2.2.2
+wheel                      0.43.0
+zipp                       3.19.2
diff --git a/nersc/performance_indicators/README_ray.md b/nersc/performance_indicators/README_ray.md
new file mode 100644
index 0000000..96c4467
--- /dev/null
+++ b/nersc/performance_indicators/README_ray.md
@@ -0,0 +1,49 @@
+
+
+## Run with Python `venv`
+
+### `lightning-kokkos` from pypi wheels
+
+NOTE: `venv/qml_LK` is described in `single_circuits/README.md`
+
+Start interactive job on CPU node for testing
+``` bash
+salloc -q interactive -C cpu -t 0:30:00 -A m4693
+
+# and execute in this interactive session:
+
+source /global/common/software/m4693/venv/qml_LK/bin/activate
+cd nersc/
+
+python3 -u performance_indicators/perf_ind_kernel.py --numFeatures 4 --inputPath performance_indicators/linearly_separable/
+```
+
+Runtimes with Ray on interactive CPU node, dataset with 240x240 = 57,600 kernels
+```
+> 57600 / 128 = 450 kernels per core
+> default.qubit.jax
+qubits     4     10     15
+real    1m33s  2m42  11m27
+user    0m46s  1m00   4m20
+sys     0m22s  0m28   2m21
+```
+
+Start batch job on CPU node
+``` bash
+sbatch submit_job_single.slr
+```
+
+Runtime with Ray on batch CPU node, 15 qubits
+```
+Job ID: 28820822
+Cluster: perlmutter
+User/Group: tgermain/tgermain
+State: COMPLETED (exit code 0)
+Nodes: 1
+Cores per node: 256
+CPU Utilized: 1-19:42:40
+CPU Efficiency: 86.94% of 2-02:16:32 core-walltime
+Job Wall-clock time: 00:11:47
+Memory Utilized: 314.08 GB
+Memory Efficiency: 65.90% of 476.56 GB
+```
\ No newline at end of file
diff --git a/nersc/performance_indicators/generate_linearly_separable.py b/nersc/performance_indicators/generate_linearly_separable.py
index 477f327..18c9d14 100644
--- a/nersc/performance_indicators/generate_linearly_separable.py
+++ b/nersc/performance_indicators/generate_linearly_separable.py
@@ -25,7 +25,7 @@
 
 n_samples = 300
 
-for n_features in range(2, 21):
+for n_features in range(2, 31):
     margin = 0.02 * n_features
 
     X, y = generate_linearly_separable(n_samples, n_features, margin)
diff --git a/nersc/performance_indicators/perf_ind_kernel.py b/nersc/performance_indicators/perf_ind_kernel.py
index e633af2..6dfb0db 100644
--- a/nersc/performance_indicators/perf_ind_kernel.py
+++ b/nersc/performance_indicators/perf_ind_kernel.py
@@ -8,6 +8,9 @@
 import os
 import yaml
 import subprocess
+
+import ray
+
 from qml_benchmarks.hyperparam_search_utils import read_data
 
 import argparse
@@ -37,15 +40,20 @@ def get_parser():
     # You only need to change this to make a different performance indicator
 
     #define the model
-    from qml_benchmarks.models.projected_quantum_kernel import ProjectedQuantumKernel as Model
+    from qml_benchmarks.models.iqp_kernel import IQPKernelClassifier as Model
 
     #implementation attributes of model
     use_jax = False
     vmap = True
-    jit = True
-    model_settings = {'use_jax': use_jax, 'vmap': vmap, 'jit': jit}
+    jit = False
+    use_ray = True
+    model_settings = {'use_jax': use_jax, 'vmap': vmap, 'jit': jit,
+                      'use_ray': use_ray}
+
+    perf_ind_name = 'RAY'  #a name for the performance indicator used for naming files
 
-    perf_ind_name = 'CAT_CPU'  #a name for the performance indicator used for naming files
+    if use_ray:
+        ray.init()
 
     #################################
 
diff --git a/nersc/run_hyperparam_search.sh b/nersc/run_hyperparam_search.sh
new file mode 100755
index 0000000..e53e043
--- /dev/null
+++ b/nersc/run_hyperparam_search.sh
@@ -0,0 +1,27 @@
+#!/bin/bash -e
+
+export RAY_DEDUP_LOGS=0
+
+REPO=/qml-benchmarks
+DATA=${REPO}/nersc/performance_indicators/linearly_separable
+
+GENERATE_DATA=0
+# running python paper/benchmarks/generate_linearly_separable.py will generate a folder linearly_separable/ (in the current directory).
+if [[ GENERATE_DATA == 1 ]]; then
+    python ${REPO}/paper/benchmarks/generate_linearly_separable.py
+fi
+
+# You can then use any of the *.csv from this folder to start training. e.g.
+#python ${QML}/scripts/run_hyperparameter_search.py\
+# --classifier-name IQPVariationalClassifier\
+# --dataset-path linearly_separable/linearly_separable_10d_train.csv
+
+# I reduced the grid space and the input size for a faster turn around. This was my command:
+python3 -u ${REPO}/scripts/run_hyperparameter_search.py\
+ --classifier-name IQPVariationalClassifier\
+ --dataset-path ${DATA}/linearly_separable_4d_train.csv\
+ --clean True
+# --n-jobs 256  # 4
+# --n_layers 1 2\
+# --learning_rate 0.001\
+# --repeats 1\
diff --git a/nersc/run_performance_indicator.sh b/nersc/run_performance_indicator.sh
new file mode 100755
index 0000000..c2abeb4
--- /dev/null
+++ b/nersc/run_performance_indicator.sh
@@ -0,0 +1,7 @@
+#!/bin/bash -e
+
+export RAY_DEDUP_LOGS=0
+
+NUM_FEATURES=20
+
+python3 -u performance_indicators/perf_ind_kernel.py --numFeatures $NUM_FEATURES --inputPath performance_indicators/linearly_separable/
diff --git a/nersc/start-head.sh b/nersc/start-head.sh
new file mode 100644
index 0000000..11c0362
--- /dev/null
+++ b/nersc/start-head.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+export LC_ALL=C.UTF-8
+export LANG=C.UTF-8
+
+echo "starting ray head node"
+# Launch the head node
+ray start --head --node-ip-address=$1 --port=6379  # --redis-password=$2 
+sleep infinity
diff --git a/nersc/start-worker.sh b/nersc/start-worker.sh
new file mode 100644
index 0000000..52a8104
--- /dev/null
+++ b/nersc/start-worker.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+export LC_ALL=C.UTF-8
+export LANG=C.UTF-8
+
+echo "starting ray worker node"
+ray start --address $1  # --redis-password=$2
+sleep infinity
diff --git a/nersc/submit_job_multinode.slr b/nersc/submit_job_multinode.slr
new file mode 100644
index 0000000..c646dbd
--- /dev/null
+++ b/nersc/submit_job_multinode.slr
@@ -0,0 +1,71 @@
+#!/bin/bash
+#SBATCH -A m4693
+#SBATCH -C cpu
+#SBATCH -t 0:10:00
+#SBATCH --job-name=qml_multinode
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node
+#SBATCH --cpus-per-task=128
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=thomas.germain@xanadu.ai
+#SBATCH --output=out/%j.%x.out
+#SBATCH --error=out/%j.%x.err
+
+#-------------------------------------------------------------------
+# SBATCH -q shared 
+# SBATCH --gpus-per-task=4 # 4 GPUs per node
+
+set -u ;  # exit if you try to use an uninitialized variable
+
+echo "S:starting"
+echo "S:PWD=$PWD"
+
+################################################################
+# Define image and command
+################################################################
+
+IMG=tgermain/ubu22-pennylane-ray
+CMD="./run_hyperparam_search.sh"
+
+echo "S:IMG=$IMG"
+echo "S:CMD=$CMD"
+
+################################################################
+# Initialize Ray cluster environment
+################################################################
+
+head_node=$(hostname)
+head_node_ip=$(hostname --ip-address)
+# if we detect a space character in the head node IP, we'll
+# convert it to an ipv4 address. This step is optional.
+if [[ "$head_node_ip" == *" "* ]]; then
+IFS=' ' read -ra ADDR <<<"$head_node_ip"
+if [[ ${#ADDR[0]} -gt 16 ]]; then
+  head_node_ip=${ADDR[1]}
+else
+  head_node_ip=${ADDR[0]}
+fi
+fi
+port=6379
+
+echo "STARTING HEAD at $head_node"
+echo "Head node IP: $head_node_ip"
+srun --nodes=1 --ntasks=1 -w $head_node ./wrap_podman.sh $IMG "./start-head.sh $head_node_ip" &
+sleep 10
+
+worker_num=$(($SLURM_JOB_NUM_NODES - 1)) #number of nodes other than the head node
+srun -n $worker_num --nodes=$worker_num --ntasks-per-node=1 --exclude $head_node ./wrap_podman.sh $IMG "./start-worker.sh $head_node_ip:$port" &
+sleep 5
+
+################################################################
+# Run Python scripts in container
+################################################################
+
+echo "S:ready to run"
+
+./wrap_podman.sh $IMG "$CMD" "$head_node_ip:$port"
+
+sleep 1
+echo S:done
+
+# EOF
diff --git a/nersc/submit_job_shared.slr b/nersc/submit_job_shared.slr
new file mode 100644
index 0000000..17ce938
--- /dev/null
+++ b/nersc/submit_job_shared.slr
@@ -0,0 +1,51 @@
+#!/bin/bash
+#SBATCH -A m4693
+#SBATCH -C cpu
+#SBATCH -t 0:10:00
+#SBATCH --job-name=qml_shared
+#SBATCH -q shared 
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node
+#SBATCH --cpus-per-task=8
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=thomas.germain@xanadu.ai
+#SBATCH --output=out/%j.%x.out
+#SBATCH --error=out/%j.%x.err
+
+#-------------------------------------------------------------------
+# SBATCH --gpus-per-task=4 # 4 GPUs per node
+
+set -u ;  # exit  if you try to use an uninitialized variable
+
+echo "S:starting"
+echo "S:PWD=$PWD"
+
+################################################################
+# Define image and command
+################################################################
+
+IMG=tgermain/ubu22-pennylane-ray
+CMD="./run_performance_indicator.sh"
+
+echo "S:IMG=$IMG"
+echo "S:CMD=$CMD"
+
+################################################################
+# Initialize Ray cluster environment
+################################################################
+
+# ...
+
+################################################################
+# Run Python scripts in container
+################################################################
+
+N=1
+
+echo "S:ready to run"
+srun -n $N ./wrap_podman.sh $IMG "$CMD"
+
+sleep 1
+echo S:done
+
+# EOF
diff --git a/nersc/submit_job_single.slr b/nersc/submit_job_single.slr
new file mode 100644
index 0000000..4df2728
--- /dev/null
+++ b/nersc/submit_job_single.slr
@@ -0,0 +1,51 @@
+#!/bin/bash
+#SBATCH -A m4693
+#SBATCH -C cpu
+#SBATCH -t 0:30:00
+#SBATCH --job-name=qml_single
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node
+#SBATCH --cpus-per-task=256
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=thomas.germain@xanadu.ai
+#SBATCH --output=out/%j.%x.out
+#SBATCH --error=out/%j.%x.err
+
+#-------------------------------------------------------------------
+# SBATCH -q shared 
+# SBATCH --gpus-per-task=4 # 4 GPUs per node
+
+set -u ;  # exit  if you try to use an uninitialized variable
+
+echo "S:starting"
+echo "S:PWD=$PWD"
+
+################################################################
+# Define image and command
+################################################################
+
+IMG=tgermain/ubu22-pennylane-ray
+CMD="./run_performance_indicator.sh"
+
+echo "S:IMG=$IMG"
+echo "S:CMD=$CMD"
+
+################################################################
+# Initialize Ray cluster environment
+################################################################
+
+# ...
+
+################################################################
+# Run Python scripts in container
+################################################################
+
+N=1
+
+echo "S:ready to run"
+srun -n $N ./wrap_podman.sh $IMG "$CMD"
+
+sleep 1
+echo S:done
+
+# EOF
diff --git a/nersc/submit_job_venv.slr b/nersc/submit_job_venv.slr
new file mode 100644
index 0000000..3cc02fd
--- /dev/null
+++ b/nersc/submit_job_venv.slr
@@ -0,0 +1,48 @@
+#!/bin/bash
+#SBATCH -A m4693
+#SBATCH -C cpu
+#SBATCH -t 00:30:00
+#SBATCH --job-name=qml_single
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1 # 1 ray worker runtime per node
+#SBATCH --cpus-per-task=256
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=thomas.germain@xanadu.ai
+#SBATCH --output=out/%j.%x.out
+#SBATCH --error=out/%j.%x.err
+
+#-------------------------------------------------------------------
+# SBATCH -q shared 
+# SBATCH --gpus-per-task=4 # 4 GPUs per node
+
+set -u ;  # exit  if you try to use an uninitialized variable
+
+echo "S:starting"
+
+################################################################
+# Define venv and command
+################################################################
+
+module load python
+source /global/common/software/m4693/venv/qml_LK/bin/activate
+
+cd /global/cfs/cdirs/m4693/qml-benchmarks-devel/nersc/
+
+CMD="./run_performance_indicator.sh"
+
+echo "S:PWD=$PWD"
+echo "S:CMD=$CMD"
+
+################################################################
+# Run Python scripts in container
+################################################################
+
+N=1
+
+echo "S:ready to run"
+srun -n $N $CMD
+
+sleep 1
+echo S:done
+
+# EOF
diff --git a/nersc/wrap_podman.sh b/nersc/wrap_podman.sh
index ee8ec5d..c352bb3 100755
--- a/nersc/wrap_podman.sh
+++ b/nersc/wrap_podman.sh
@@ -2,37 +2,46 @@
 echo W:myRank is $SLURM_PROCID
 IMG=$1
 CMD=$2
-outPath=$3
-CFSH=$4
-BASE_DIR=$5
-WORK_DIR=$6
+RAY_ADDRESS=$3
 
-if [ $SLURM_PROCID -eq 0 ] ; then 
+if [[ $SLURM_PROCID -eq 0 ]]; then 
    echo W:IMG=$IMG 
    echo W:CMD=$CMD
-   #echo Q:fire $
 fi
 
-echo W:BASE_DIR=$BASE_DIR
-echo 'W:start podman'
+CFSH=/global/cfs/cdirs/m4693  # CFS home
+REPO_DIR=$CFSH/qml-benchmarks-devel  # qml-benchmark repo
+ROOT_DIR=$REPO_DIR/nersc/root  # to access local python packages
+WORK_DIR=$REPO_DIR/nersc  # to store output files
+
+# Mount /tmp to avoid following error with Ray:
+#     ValueError: Can't find a `node_ip_address.json` file
+
+PORT=6379
+
+# Script will run in the workdir mounted in the container,
+# this will allow us to access the output files easily.
+
 podman-hpc run -it \
-    --volume $CFSH/$BASE_DIR:/root \
-    --volume $CFSH/$BASE_DIR:$BASE_DIR \
-    --volume $CFSH/$BASE_DIR/nersc/performance_indicators/linearly_separable:/linearly_separable \
-    --volume $CFSH/$WORK_DIR:$WORK_DIR \
+    --net host \
+    -p $PORT:$PORT \
+    --volume /tmp:/tmp \
+    --volume $ROOT_DIR:/root \
+    --volume $REPO_DIR:/qml-benchmarks \
+    --volume $WORK_DIR:/work_dir \
+    --workdir /work_dir \
     -e HDF5_USE_FILE_LOCKING='FALSE' \
-    --workdir $WORK_DIR \
+    --shm-size=10.24gb \
     $IMG <<EOF 
-echo P:pwd; pwd
-echo P:all
-ls -l ../..
+echo P:starting
+echo P:PWD=$PWD
+ls -l
+export RAY_DEDUP_LOGS=0
+export RAY_ADDRESS=$RAY_ADDRESS
+echo P:RAY_ADDRESS=\${RAY_ADDRESS}
 $CMD
 echo P:done
 exit
 EOF
   
-echo 'W:done podman'
-
-# spare
-# 	   --volume $HOME:/home \
-    # --volume  $CFSH/qml-benchmarks:/root 
+echo 'W:done'
diff --git a/src/qml_benchmarks/models/iqp_kernel.py b/src/qml_benchmarks/models/iqp_kernel.py
index e381bb7..e7ad82c 100644
--- a/src/qml_benchmarks/models/iqp_kernel.py
+++ b/src/qml_benchmarks/models/iqp_kernel.py
@@ -19,6 +19,9 @@
 import numpy as np
 import jax
 import jax.numpy as jnp
+
+import ray
+
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.svm import SVC
 from sklearn.preprocessing import MinMaxScaler
@@ -37,6 +40,7 @@ def __init__(
         use_jax=False,
         vmap=True,
         jit=True,
+        use_ray=False,
         random_state=42,
         scaling=1.0,
         max_vmap=250,
@@ -89,6 +93,7 @@ def __init__(
         self.use_jax = use_jax
         self.vmap = vmap
         self.jit = jit
+        self.use_ray = use_ray
 
         # data-dependant attributes
         # which will be initialised by calling "fit"
@@ -100,8 +105,10 @@ def __init__(
     def generate_key(self):
         return self.rng.integers(1000000)
 
-    def construct_circuit(self):
-        dev = qml.device(self.dev_type, wires=self.n_qubits_)
+    def construct_circuit(self, dev=None):
+
+        if dev is None:
+            dev = qml.device(self.dev_type, wires=self.n_qubits_)
 
         def wrapped_circuit(x):
             @qml.qnode(dev, **self.qnode_kwargs)
@@ -136,7 +143,8 @@ def circuit(x):
             else:
                 circuit = qjit(circuit)
 
-        self.circuit = circuit
+        if dev is None:
+            self.circuit = circuit
 
         return circuit
 
@@ -152,9 +160,32 @@ def precompute_kernel(self, X1, X2):
         dim1 = len(X1)
         dim2 = len(X2)
 
-        circuit = self.construct_circuit()
+        if self.use_ray:
+            # concatenate all pairs of vectors
+            Z = np.array([np.concatenate((X1[i], X2[j])) for i in range(dim1) for j in range(dim2)])
+
+            @ray.remote
+            def run_circuit(x):
+                # dev is not pickable so it must be created within ray job.
+                dev = qml.device(self.dev_type, wires=self.n_qubits_)
+                circuit = self.construct_circuit(dev)
+                # TODO: run batch of circuits
+                return circuit(x)
+
+            print("precompute_kernel() start (+)")
+            print(Z.shape)
+            
+            kernel_values = []
+            for z in Z:
+                kernel_values.append(run_circuit.remote(z))
+
+            # reshape the values into the kernel matrix
+            kernel_matrix = np.reshape(ray.get(kernel_values), (dim1, dim2))
+            print(kernel_matrix.shape)
+
+            print("precompute_kernel() done")
 
-        if self.use_jax:
+        elif self.use_jax:
             # concatenate all pairs of vectors
             Z = np.array([np.concatenate((X1[i], X2[j])) for i in range(dim1) for j in range(dim2)])
             # if batched circuit is used