diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 484cf2ae2..53e2c419e 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -21,6 +21,9 @@ * Lightning-Kokkos migrated to the new device API. [(#810)](https://github.com/PennyLaneAI/pennylane-lightning/pull/810) +* Lightning-GPU migrated to the new device API. + [(#853)](https://github.com/PennyLaneAI/pennylane-lightning/pull/853) + ### Breaking changes * Deprecate PI gates implementation. diff --git a/.github/workflows/wheel_noarch.yml b/.github/workflows/wheel_noarch.yml index 11460cac1..0414fcd7b 100644 --- a/.github/workflows/wheel_noarch.yml +++ b/.github/workflows/wheel_noarch.yml @@ -50,7 +50,6 @@ jobs: if: ${{ matrix.pl_backend == 'lightning_qubit'}} uses: actions/checkout@v4 - - uses: actions/setup-python@v5 if: ${{ matrix.pl_backend == 'lightning_qubit'}} with: diff --git a/MANIFEST.in b/MANIFEST.in index 4c1a79b51..23ba93b56 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,7 @@ include cmake/* include requirements.txt include .github/CHANGELOG.md include pennylane_lightning/lightning_qubit/lightning_qubit.toml -include pennylane_lightning/lightning_qpu/lightning_gpu.toml +include pennylane_lightning/lightning_gpu/lightning_gpu.toml include pennylane_lightning/lightning_kokkos/lightning_kokkos.toml include pennylane_lightning/core/_version.py graft pennylane_lightning/core/src/ diff --git a/Makefile b/Makefile index f43c9e903..5973200c5 100644 --- a/Makefile +++ b/Makefile @@ -35,9 +35,11 @@ help: @echo " test-cpp [verbose=1] to run the C++ test suite (requires CMake)" @echo " use with 'verbose=1' for building with verbose flag" @echo " test-cpp [target=?] to run a specific C++ test target (requires CMake)." + @echo " test-cpp-mpi [backend=?] to run the C++ test suite with MPI (requires CMake and MPI)" + @echo " Default: lightning_gpu" @echo " test-python [device=?] to run the Python test suite" @echo " Default: lightning.qubit" - @echo " wheel [backend=?] to configure and build Python wheels + @echo " wheel [backend=?] to configure and build Python wheels" @echo " Default: lightning_qubit" @echo " coverage [device=?] to generate a coverage report for python interface" @echo " Default: lightning.qubit" @@ -98,7 +100,7 @@ coverage-cpp: lcov --directory . -b ../pennylane_lightning/core/src/ --capture --output-file coverage.info; \ genhtml coverage.info --output-directory out -.PHONY: test-python test-builtin test-suite test-cpp +.PHONY: test-python test-builtin test-suite test-cpp test-cpp-mpi test-python: test-builtin test-suite test-builtin: @@ -124,6 +126,27 @@ else cmake --build ./BuildTests $(VERBOSE) --target test endif +test-cpp-mpi: + rm -rf ./BuildTests + cmake -BBuildTests -G Ninja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=ON \ + -DENABLE_WARNINGS=ON \ + -DPL_BACKEND=lightning_gpu \ + -DENABLE_MPI=ON \ + $(OPTIONS) +ifdef target + cmake --build ./BuildTests $(VERBOSE) --target $(target) + mpirun -np 2 ./BuildTests/$(target) +else + cmake --build ./BuildTests $(VERBOSE) + for file in ./BuildTests/*_test_runner_mpi; do \ + echo "Running $$file"; \ + mpirun -np 2 $$file ; \ + done +endif + + .PHONY: format format-cpp format-python format: format-cpp format-python diff --git a/doc/lightning_gpu/device.rst b/doc/lightning_gpu/device.rst index a5162c757..405ea9764 100644 --- a/doc/lightning_gpu/device.rst +++ b/doc/lightning_gpu/device.rst @@ -11,9 +11,9 @@ A ``lightning.gpu`` device can be loaded using: import pennylane as qml dev = qml.device("lightning.gpu", wires=2) -If the NVIDIA cuQuantum libraries are available, the above device will allow all operations to be performed on a CUDA capable GPU of generation SM 7.0 (Volta) and greater. If the libraries are not correctly installed, or available on path, the device will fall-back to ``lightning.qubit`` and perform all simulation on the CPU. +If the NVIDIA cuQuantum libraries are available, the above device will allow all operations to be performed on a CUDA capable GPU of generation SM 7.0 (Volta) and greater. If the libraries are not correctly installed, or available on path, the device will raise an error. -The ``lightning.gpu`` device also directly supports quantum circuit gradients using the adjoint differentiation method. This can be enabled at the PennyLane QNode level with: +The ``lightning.gpu`` device supports quantum circuit gradients using the adjoint differentiation method by default. This can be enabled at the PennyLane QNode level with: .. code-block:: python @@ -281,3 +281,6 @@ To enable the memory-optimized adjoint method with MPI support, ``batch_obs`` sh dev = qml.device('lightning.gpu', wires= n_wires, mpi=True, batch_obs=True) For the adjoint method, each MPI process will provide the overall simulation results. + +.. note:: + The observable ``Projector``` does not have support with the multi-GPU backend. diff --git a/mpitests/conftest.py b/mpitests/conftest.py index a2084f2a5..552cf9f33 100644 --- a/mpitests/conftest.py +++ b/mpitests/conftest.py @@ -98,6 +98,13 @@ def get_device(): # Device specification if device_name == "lightning.gpu": from pennylane_lightning.lightning_gpu import LightningGPU as LightningDevice + from pennylane_lightning.lightning_gpu._measurements import ( + LightningGPUMeasurements as LightningMeasurements, + ) + from pennylane_lightning.lightning_gpu._state_vector import ( + LightningGPUStateVector as LightningStateVector, + ) + else: raise qml.DeviceError(f"The MPI tests do not apply to the {device_name} device.") diff --git a/mpitests/test_adjoint_jacobian.py b/mpitests/test_adjoint_jacobian.py index 6f3b5c7f5..9d56dfdb1 100644 --- a/mpitests/test_adjoint_jacobian.py +++ b/mpitests/test_adjoint_jacobian.py @@ -26,17 +26,15 @@ from pennylane import QNode from pennylane import numpy as np from pennylane import qnode +from pennylane.devices import ExecutionConfig +from pennylane.tape import QuantumScript from scipy.stats import unitary_group +from pennylane_lightning.lightning_gpu_ops import LightningException + if not ld._CPP_BINARY_AVAILABLE: pytest.skip("No binary module found. Skipping.", allow_module_level=True) -I, X, Y, Z = ( - np.eye(2), - qml.PauliX.compute_matrix(), - qml.PauliY.compute_matrix(), - qml.PauliZ.compute_matrix(), -) # Tuple passed to distributed device ctor # np.complex for data type and True or False @@ -59,265 +57,255 @@ def fixture_dev(request): ) -def Rx(theta): - r"""One-qubit rotation about the x axis. - - Args: - theta (float): rotation angle - Returns: - array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_x \theta/2}` - """ - return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * X - - -def Ry(theta): - r"""One-qubit rotation about the y axis. - - Args: - theta (float): rotation angle - Returns: - array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_y \theta/2}` - """ - return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * Y - - -def Rz(theta): - r"""One-qubit rotation about the z axis. - - Args: - theta (float): rotation angle - Returns: - array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_z \theta/2}` - """ - return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * Z - - class TestAdjointJacobian: # pylint: disable=too-many-public-methods """Tests for the adjoint_jacobian method""" - def test_not_expval(self, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_not_expval(self, dev, batch_obs): """Test if a QuantumFunctionError is raised for a tape with measurements that are not expectation values""" - with qml.tape.QuantumTape() as tape: - qml.RX(0.1, wires=0) - qml.var(qml.PauliZ(0)) + qs = QuantumScript([qml.RX(1.23, 0)], [qml.var(qml.PauliZ(0))], trainable_params=[0]) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) with pytest.raises( qml.QuantumFunctionError, match="Adjoint differentiation method does not" ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - with qml.tape.QuantumTape() as tape: - qml.RX(0.1, wires=0) - qml.state() + qs = QuantumScript([qml.RX(1.23, 0)], [qml.state()], trainable_params=[0]) - if device_name == "lightning.gpu": - message = "Adjoint differentiation does not support State measurements." - else: - message = "Adjoint differentiation method does not support measurement StateMP." with pytest.raises( qml.QuantumFunctionError, - match=message, + match="Adjoint differentiation method does not support measurement StateMP.", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - def test_finite_shots_warns(self): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_finite_shots_warns(self, dev, batch_obs): """Tests warning raised when finite shots specified""" - dev = qml.device(device_name, wires=8, mpi=True, shots=1) - - with qml.tape.QuantumTape() as tape: - qml.expval(qml.PauliZ(0)) + qs = QuantumScript( + [qml.RX(1.23, 0)], [qml.expval(qml.Z(0))], shots=10, trainable_params=[0] + ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - with pytest.warns( - UserWarning, + with pytest.raises( + qml.QuantumFunctionError, match="Requested adjoint differentiation to be computed with finite shots.", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) def test_empty_measurements(self, dev): """Tests if an empty array is returned when the measurements of the tape is empty.""" - with qml.tape.QuantumTape() as tape: + def circuit(): qml.RX(0.4, wires=[0]) + return qml.expval(qml.PauliZ(0)) + + result = QNode(circuit, dev, diff_method="adjoint") + + jac = qml.grad(result)() - jac = dev.adjoint_jacobian(tape) assert len(jac) == 0 - def test_unsupported_op(self, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_unsupported_op(self, batch_obs, dev): """Test if a QuantumFunctionError is raised for an unsupported operation, i.e., multi-parameter operations that are not qml.Rot""" - with qml.tape.QuantumTape() as tape: - qml.CRot(0.1, 0.2, 0.3, wires=[0, 1]) - qml.expval(qml.PauliZ(0)) + qs = QuantumScript( + [qml.CRot(0.1, 0.2, 0.3, wires=[0, 1])], + [qml.expval(qml.PauliZ(0))], + trainable_params=[0], + ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) with pytest.raises( - qml.QuantumFunctionError, - match="The CRot operation is not supported using the", + LightningException, + match="The operation is not supported using the adjoint differentiation method", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - def test_proj_unsupported(self, dev): + @pytest.mark.skip("WIP: Need a deep review if LGPU accept Projector") + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_proj_unsupported(self, batch_obs, dev): """Test if a QuantumFunctionError is raised for a Projector observable""" - with qml.tape.QuantumTape() as tape: - qml.CRX(0.1, wires=[0, 1]) - qml.expval(qml.Projector([0, 1], wires=[0, 1])) + + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) + + qs = QuantumScript( + [qml.CRX(0.1, wires=[0, 1])], + [qml.expval(qml.Projector([0, 1], wires=[0, 1]))], + trainable_params=[0], + ) with pytest.raises( qml.QuantumFunctionError, match="differentiation method does not support the Projector", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - with qml.tape.QuantumTape() as tape: - qml.CRX(0.1, wires=[0, 1]) - qml.expval(qml.Projector([0], wires=[0]) @ qml.PauliZ(0)) + qs = QuantumScript( + [qml.CRX(0.1, wires=[0, 1])], + [qml.expval(qml.Projector([0], wires=[0]) @ qml.PauliZ(0))], + trainable_params=[0], + ) with pytest.raises( qml.QuantumFunctionError, match="differentiation method does not support the Projector", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) + + @staticmethod + def tol_for_allclose(c_dtype): + """Compute the tolerance for allclose""" + return 1e-3 if c_dtype == np.complex64 else 1e-7 @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) @pytest.mark.parametrize("G", [qml.RX, qml.RY, qml.RZ]) @pytest.mark.parametrize("stateprep", [qml.QubitStateVector, qml.StatePrep]) - def test_pauli_rotation_gradient(self, stateprep, G, theta, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_pauli_rotation_gradient( + self, stateprep, G, theta, batch_obs, dev + ): # pylint: disable=too-many-arguments """Tests that the automatic gradients of Pauli rotations are correct.""" random_state = np.array( [0.43593284 - 0.02945156j, 0.40812291 + 0.80158023j], requires_grad=False ) - tape = qml.tape.QuantumScript( - [stateprep(random_state, 0), G(theta, 0)], [qml.expval(qml.PauliZ(0))] + qs = QuantumScript( + [stateprep(random_state, 0), G(theta, 0)], + [qml.expval(qml.PauliZ(0))], + trainable_params=[1], ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - tape.trainable_params = {1} - - calculated_val = dev.adjoint_jacobian(tape) + calculated_val = dev.compute_derivatives(qs, config) - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = self.tol_for_allclose(dev.c_dtype) # compare to finite differences - tapes, fn = qml.gradients.param_shift(tape) + tapes, fn = qml.gradients.param_shift(qs) numeric_val = fn(qml.execute(tapes, dev, None)) assert np.allclose(calculated_val, numeric_val, atol=tol, rtol=0) @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) @pytest.mark.parametrize("stateprep", [qml.QubitStateVector, qml.StatePrep]) - def test_Rot_gradient(self, stateprep, theta, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_Rot_gradient(self, stateprep, theta, batch_obs, dev): """Tests that the device gradient of an arbitrary Euler-angle-parameterized gate is correct.""" params = np.array([theta, theta**3, np.sqrt(2) * theta]) - with qml.tape.QuantumTape() as tape: - stateprep(np.array([1.0, -1.0], requires_grad=False) / np.sqrt(2), wires=0) - qml.Rot(*params, wires=[0]) - qml.expval(qml.PauliZ(0)) + qs = QuantumScript( + [ + stateprep(np.array([1.0, -1.0], requires_grad=False) / np.sqrt(2), wires=0), + qml.Rot(*params, wires=[0]), + ], + [qml.expval(qml.PauliZ(0))], + trainable_params=[1, 2, 3], + ) - tape.trainable_params = {1, 2, 3} + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - calculated_val = dev.adjoint_jacobian(tape) + calculated_val = dev.compute_derivatives(qs, config) - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = self.tol_for_allclose(dev.c_dtype) # compare to finite differences - tapes, fn = qml.gradients.param_shift(tape) + tapes, fn = qml.gradients.param_shift(qs) numeric_val = fn(qml.execute(tapes, dev, None)) assert np.allclose(calculated_val, numeric_val, atol=tol, rtol=0) - @pytest.mark.parametrize("par", [1, -2, 1.623, -0.051, 0]) # integers, floats, zero - def test_ry_gradient(self, par, tol, dev): - """Test that the gradient of the RY gate matches the exact analytic formula.""" - with qml.tape.QuantumTape() as tape: - qml.RY(par, wires=[0]) - qml.expval(qml.PauliX(0)) - - tape.trainable_params = {0} + @pytest.mark.parametrize("param", [1, -2, 1.623, -0.051, 0]) # integers, floats, zero + @pytest.mark.parametrize( + "rotation, meas, expected_func", + [ + (qml.RY, qml.PauliX, lambda x: np.cos(x)), # pylint: disable=unnecessary-lambda + (qml.RX, qml.PauliZ, lambda x: -np.sin(x)), # pylint: disable=unnecessary-lambda + ], + ) + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_r_gradient( + self, tol, param, rotation, meas, expected_func, batch_obs, dev + ): # pylint: disable=too-many-arguments + """Test for the gradient of the rotation gate matches the known formula.""" - # gradients - exact = np.cos(par) - grad_A = dev.adjoint_jacobian(tape) + qs = QuantumScript( + [rotation(param, wires=0)], + [qml.expval(meas(0))], + trainable_params=[0], + ) - # different methods must agree - assert np.allclose(grad_A, exact, atol=tol, rtol=0) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - def test_rx_gradient(self, tol, dev): - """Test that the gradient of the RX gate matches the known formula.""" - a = 0.7418 + # circuit jacobians + dev_jacobian = dev.compute_derivatives(qs, config) + expected_jacobian = expected_func(param) + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - with qml.tape.QuantumTape() as tape: - qml.RX(a, wires=0) - qml.expval(qml.PauliZ(0)) + @staticmethod + def process_and_execute_multiple_rx(dev, params, meas, batch_obs): + """Compute the circuit with multiple RX gates""" + qs = QuantumScript( + [qml.RX(params[0], wires=0), qml.RX(params[1], wires=1), qml.RX(params[2], wires=2)], + meas, + trainable_params=[0, 1, 2], + ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) # circuit jacobians - dev_jacobian = dev.adjoint_jacobian(tape) - expected_jacobian = -np.sin(a) - assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + dev_jacobian = dev.compute_derivatives(qs, config) - def test_multiple_rx_gradient_pauliz(self, tol, dev): + return dev_jacobian + + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_pauliz(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result.""" params = np.array([np.pi, np.pi / 2, np.pi / 3]) - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) - - for idx in range(3): - qml.expval(qml.PauliZ(idx)) + meas = [qml.expval(qml.PauliZ(idx)) for idx in range(3)] # circuit jacobians - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = -np.diag(np.sin(params)) assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - def test_multiple_rx_gradient_hermitian(self, tol, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_hermitian(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result with Hermitian observable """ - params = np.array([np.pi, np.pi / 2, np.pi / 3]) - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) + params = np.array([np.pi, np.pi / 2, np.pi / 3]) - for idx in range(3): - qml.expval(qml.Hermitian([[1, 0], [0, -1]], wires=[idx])) + meas = [qml.expval(qml.Hermitian([[1, 0], [0, -1]], wires=[idx])) for idx in range(3)] - tape.trainable_params = {0, 1, 2} # circuit jacobians - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = -np.diag(np.sin(params)) assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member - ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} - - def test_multiple_rx_gradient_expval_hermitian(self, tol, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_expval_hermitian(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result with Hermitian observable """ params = np.array([np.pi / 3, np.pi / 4, np.pi / 5]) - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) - + meas = [ qml.expval( qml.Hermitian( [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], wires=[0, 2], ) ) + ] - tape.trainable_params = {0, 1, 2} - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = np.array( [ -np.sin(params[0]) * np.cos(params[2]), @@ -328,37 +316,31 @@ def test_multiple_rx_gradient_expval_hermitian(self, tol, dev): assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member - ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} - - def test_multiple_rx_gradient_expval_hamiltonian(self, tol, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_expval_hamiltonian(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result with Hermitian observable """ params = np.array([np.pi / 3, np.pi / 4, np.pi / 5]) - ham = qml.Hamiltonian( - [1.0, 0.3, 0.3, 0.4], - [ - qml.PauliX(0) @ qml.PauliX(1), - qml.PauliZ(0), - qml.PauliZ(1), - qml.Hermitian( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], - wires=[0, 2], - ), - ], - ) - - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) - - qml.expval(ham) + meas = [ + qml.expval( + qml.Hamiltonian( + [1.0, 0.3, 0.3, 0.4], + [ + qml.PauliX(0) @ qml.PauliX(1), + qml.PauliZ(0), + qml.PauliZ(1), + qml.Hermitian( + [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + wires=[0, 2], + ), + ], + ) + ) + ] - tape.trainable_params = {0, 1, 2} - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = ( 0.3 * np.array([-np.sin(params[0]), 0, 0]) + 0.3 * np.array([0, -np.sin(params[1]), 0]) @@ -374,51 +356,21 @@ def test_multiple_rx_gradient_expval_hamiltonian(self, tol, dev): assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member - ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} - - @pytest.mark.parametrize("obs", [qml.PauliX, qml.PauliY]) @pytest.mark.parametrize( - "op", + "meas", [ - qml.RX(0.4, wires=0), - qml.RY(0.6, wires=0), - qml.RZ(0.8, wires=0), - qml.CRX(1.0, wires=[0, 1]), - qml.CRY(2.0, wires=[0, 1]), - qml.CRZ(3.0, wires=[0, 1]), - qml.Rot(0.2, -0.1, 0.2, wires=0), + [qml.expval(qml.PauliX(wires=0)), qml.expval(qml.PauliZ(wires=1))], + [qml.expval(qml.PauliY(wires=0)), qml.expval(qml.PauliZ(wires=1))], + [ + qml.expval( + qml.Hermitian( + [[0, 0, 1, 1], [0, 1, 2, 1], [1, 2, 1, 0], [1, 1, 0, 0]], + wires=[0, 1], + ) + ) + ], ], ) - def test_gradients_pauliz(self, op, obs, dev): - """Tests that the gradients of circuits match between the finite difference and device - methods.""" - # op.num_wires and op.num_params must be initialized a priori - with qml.tape.QuantumTape() as tape: - qml.Hadamard(wires=0) - qml.RX(0.543, wires=0) - qml.CNOT(wires=[0, 1]) - - op # pylint: disable=pointless-statement - - qml.Rot(1.3, -2.3, 0.5, wires=[0]) - qml.RZ(-0.5, wires=0) - qml.adjoint(qml.RY(0.5, wires=1), lazy=False) - qml.CNOT(wires=[0, 1]) - - qml.expval(obs(wires=0)) - qml.expval(qml.PauliZ(wires=1)) - - tape.trainable_params = set(range(1, 1 + op.num_params)) - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - # pylint: disable=unnecessary-direct-lambda-call - grad_F = (lambda t, fn: fn(qml.execute(t, dev, None)))(*qml.gradients.param_shift(tape)) - grad_D = dev.adjoint_jacobian(tape) - - assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - @pytest.mark.parametrize( "op", [ @@ -431,119 +383,72 @@ def test_gradients_pauliz(self, op, obs, dev): qml.Rot(0.2, -0.1, 0.2, wires=0), ], ) - def test_gradients_hermitian(self, op, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_gradients_pauliz_hermitian(self, op, meas, batch_obs, dev): """Tests that the gradients of circuits match between the finite difference and device methods.""" # op.num_wires and op.num_params must be initialized a priori - with qml.tape.QuantumTape() as tape: - qml.Hadamard(wires=0) - qml.RX(0.543, wires=0) - qml.CNOT(wires=[0, 1]) - - op.queue() - - qml.Rot(1.3, -2.3, 0.5, wires=[0]) - qml.RZ(-0.5, wires=0) - qml.adjoint(qml.RY(0.5, wires=1), lazy=False) - qml.CNOT(wires=[0, 1]) - - qml.expval( - qml.Hermitian( - [[0, 0, 1, 1], [0, 1, 2, 1], [1, 2, 1, 0], [1, 1, 0, 0]], - wires=[0, 1], - ) - ) - - tape.trainable_params = set(range(1, 1 + op.num_params)) - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - # pylint: disable=unnecessary-direct-lambda-call - grad_F = (lambda t, fn: fn(qml.execute(t, dev, None)))(*qml.gradients.param_shift(tape)) - grad_D = dev.adjoint_jacobian(tape) - - assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - - def test_gradient_gate_with_multiple_parameters_pauliz(self, dev): - """Tests that gates with multiple free parameters yield correct gradients.""" - x, y, z = [0.5, 0.3, -0.7] - - tape = qml.tape.QuantumScript( + qs = QuantumScript( [ - qml.RX(0.4, wires=[0]), - qml.Rot(x, y, z, wires=[0]), - qml.RY(-0.2, wires=[0]), + qml.Hadamard(wires=0), + qml.RX(0.543, wires=0), + qml.CNOT(wires=[0, 1]), + op, + qml.Rot(1.3, -2.3, 0.5, wires=[0]), + qml.RZ(-0.5, wires=0), + qml.adjoint(qml.RY(0.5, wires=1), lazy=False), + qml.CNOT(wires=[0, 1]), ], - [qml.expval(qml.PauliZ(0))], + meas, + trainable_params=list(range(1, 1 + op.num_params)), ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - tape.trainable_params = {1, 2, 3} + tol = self.tol_for_allclose(dev.c_dtype) - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - grad_D = dev.adjoint_jacobian(tape) - tapes, fn = qml.gradients.param_shift(tape) + tapes, fn = qml.gradients.param_shift(qs) grad_F = fn(qml.execute(tapes, dev, None)) - # gradient has the correct shape and every element is nonzero - assert len(grad_D) == 3 - assert all(isinstance(v, np.ndarray) for v in grad_D) - assert np.count_nonzero(grad_D) == 3 - # the different methods agree + # circuit jacobians + grad_D = dev.compute_derivatives(qs, config) assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - def test_gradient_gate_with_multiple_parameters_hermitian(self, dev): - """Tests that gates with multiple free parameters yield correct gradients.""" - x, y, z = [0.5, 0.3, -0.7] - - tape = qml.tape.QuantumScript( + @pytest.mark.parametrize( + "meas", + [ + [qml.expval(qml.PauliZ(0))], + [qml.expval(qml.Hermitian([[0, 1], [1, 1]], wires=0))], [ - qml.RX(0.4, wires=[0]), - qml.Rot(x, y, z, wires=[0]), - qml.RY(-0.2, wires=[0]), + qml.expval( + qml.Hamiltonian( + [1.0, 0.3, 0.3], + [qml.PauliX(0) @ qml.PauliX(1), qml.PauliZ(0), qml.PauliZ(1)], + ) + ) ], - [qml.expval(qml.Hermitian([[0, 1], [1, 1]], wires=0))], - ) - - tape.trainable_params = {1, 2, 3} - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - grad_D = dev.adjoint_jacobian(tape) - tapes, fn = qml.gradients.param_shift(tape) - grad_F = fn(qml.execute(tapes, dev, None)) - - # gradient has the correct shape and every element is nonzero - assert len(grad_D) == 3 - assert all(isinstance(v, np.ndarray) for v in grad_D) - assert np.count_nonzero(grad_D) == 3 - # the different methods agree - assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - - def test_gradient_gate_with_multiple_parameters_hamiltonian(self, dev): + ], + ) + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_gradient_gate_with_multiple_parameters(self, meas, batch_obs, dev): """Tests that gates with multiple free parameters yield correct gradients.""" x, y, z = [0.5, 0.3, -0.7] - ham = qml.Hamiltonian( - [1.0, 0.3, 0.3], - [qml.PauliX(0) @ qml.PauliX(1), qml.PauliZ(0), qml.PauliZ(1)], - ) - - tape = qml.tape.QuantumScript( + qs = QuantumScript( [ qml.RX(0.4, wires=[0]), qml.Rot(x, y, z, wires=[0]), qml.RY(-0.2, wires=[0]), ], - [qml.expval(ham)], + meas, + trainable_params=[1, 2, 3], ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - tape.trainable_params = {1, 2, 3} - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = self.tol_for_allclose(dev.c_dtype) - grad_D = dev.adjoint_jacobian(tape) - tapes, fn = qml.gradients.param_shift(tape) + # circuit jacobians + grad_D = dev.compute_derivatives(qs, config) + tapes, fn = qml.gradients.param_shift(qs) grad_F = fn(qml.execute(tapes, dev, None)) # gradient has the correct shape and every element is nonzero @@ -553,101 +458,45 @@ def test_gradient_gate_with_multiple_parameters_hamiltonian(self, dev): # the different methods agree assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - def test_use_device_state(self, tol, dev): - """Tests that when using the device state, the correct answer is still returned.""" - - x, y, z = [0.5, 0.3, -0.7] - - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) - - tape.trainable_params = {1, 2, 3} - - dM1 = dev.adjoint_jacobian(tape) - - qml.execute([tape], dev, None) - dM2 = dev.adjoint_jacobian(tape, use_device_state=True) - - assert np.allclose(dM1, dM2, atol=tol, rtol=0) - - def test_provide_starting_state(self, tol, dev): - """Tests provides correct answer when provided starting state.""" - comm = MPI.COMM_WORLD - - x, y, z = [0.5, 0.3, -0.7] - - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) - - tape.trainable_params = {1, 2, 3} - - dM1 = dev.adjoint_jacobian(tape) - if device_name == "lightning.gpu": - local_state_vector = dev.state - complex_type = np.complex128 if dev.R_DTYPE == np.float64 else np.complex64 - state_vector = np.zeros(1 << 8).astype(complex_type) - comm.Allgather(local_state_vector, state_vector) - qml.execute([tape], dev, None) - dM2 = dev.adjoint_jacobian(tape, starting_state=state_vector) - assert np.allclose(dM1, dM2, atol=tol, rtol=0) - - def test_provide_wrong_starting_state(self, dev): - """Tests raise an exception when provided starting state mismatches.""" - x, y, z = [0.5, 0.3, -0.7] - - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) - - tape.trainable_params = {1, 2, 3} +class TestAdjointJacobianQNode: + """Test QNode integration with the adjoint_jacobian method""" - with pytest.raises( - qml.QuantumFunctionError, - match="The number of qubits of starting_state must be the same as", - ): - dev.adjoint_jacobian(tape, starting_state=np.ones(7)) + # def analytic_rotation(self): + I = np.eye(2) + X = qml.PauliX.compute_matrix() + Y = qml.PauliY.compute_matrix() + Z = qml.PauliZ.compute_matrix() - @pytest.mark.skipif( - device_name == "lightning.gpu", - reason="Adjoint differentiation does not support State measurements.", - ) - def test_state_return_type(self, dev): - """Tests raise an exception when the return type is State""" - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.state() + def Rx(self, theta): + r"""One-qubit rotation about the x axis. - tape.trainable_params = {0} + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_x \theta/2}` + """ + return math.cos(theta / 2) * self.I + 1j * math.sin(-theta / 2) * self.X - with pytest.raises( - qml.QuantumFunctionError, - match="Adjoint differentiation method does not support measurement StateMP.", - ): - dev.adjoint_jacobian(tape) + def Ry(self, theta): + r"""One-qubit rotation about the y axis. + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_y \theta/2}` + """ + return math.cos(theta / 2) * self.I + 1j * math.sin(-theta / 2) * self.Y -class TestAdjointJacobianQNode: - """Test QNode integration with the adjoint_jacobian method""" + def Rz(self, theta): + r"""One-qubit rotation about the z axis. - @pytest.fixture(params=fixture_params) - def dev(self, request): - """Returns a PennyLane device.""" - return qml.device( - device_name, - wires=8, - mpi=True, - c_dtype=request.param[0], - batch_obs=request.param[1], - ) + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_z \theta/2}` + """ + return math.cos(theta / 2) * self.I + 1j * math.sin(-theta / 2) * self.Z def test_finite_shots_error(self): """Tests that an error is raised when computing the adjoint diff on a device with finite shots""" @@ -665,6 +514,11 @@ def circ(x): qml.grad(circ)(0.1) + @staticmethod + def tol_for_allclose(c_dtype): + """Compute the tolerance for allclose""" + return 1e-3 if c_dtype == np.complex64 else 1e-7 + def test_qnode(self, mocker, dev): """Test that specifying diff_method allows the adjoint method to be selected""" args = np.array([0.54, 0.1, 0.5], requires_grad=True) @@ -684,15 +538,15 @@ def circuit(x, y, z): return qml.expval(qml.PauliX(0) @ qml.PauliZ(1)) qnode1 = QNode(circuit, dev, diff_method="adjoint") - spy = mocker.spy(dev.target_device, "adjoint_jacobian") + spy = mocker.spy(dev, "LightningAdjointJacobian") grad_fn = qml.grad(qnode1) grad_A = grad_fn(*args) spy.assert_called() - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) qnode2 = QNode(circuit, dev, diff_method="finite-diff", h=h) grad_fn = qml.grad(qnode2) @@ -726,7 +580,7 @@ def cost(p1, p2): zero_state = np.array([1.0, 0.0]) cost(reused_p, other_p) - spy = mocker.spy(dev.target_device, "adjoint_jacobian") + spy = mocker.spy(dev, "LightningAdjointJacobian") # analytic gradient grad_fn = qml.grad(cost) @@ -737,18 +591,34 @@ def cost(p1, p2): # manual gradient grad_true0 = ( expZ( - Rx(reused_p) @ Rz(other_p) @ Ry(reused_p + np.pi / 2) @ Rx(extra_param) @ zero_state + self.Rx(reused_p) + @ self.Rz(other_p) + @ self.Ry(reused_p + np.pi / 2) + @ self.Rx(extra_param) + @ zero_state ) - expZ( - Rx(reused_p) @ Rz(other_p) @ Ry(reused_p - np.pi / 2) @ Rx(extra_param) @ zero_state + self.Rx(reused_p) + @ self.Rz(other_p) + @ self.Ry(reused_p - np.pi / 2) + @ self.Rx(extra_param) + @ zero_state ) ) / 2 grad_true1 = ( expZ( - Rx(reused_p + np.pi / 2) @ Rz(other_p) @ Ry(reused_p) @ Rx(extra_param) @ zero_state + self.Rx(reused_p + np.pi / 2) + @ self.Rz(other_p) + @ self.Ry(reused_p) + @ self.Rx(extra_param) + @ zero_state ) - expZ( - Rx(reused_p - np.pi / 2) @ Rz(other_p) @ Ry(reused_p) @ Rx(extra_param) @ zero_state + self.Rx(reused_p - np.pi / 2) + @ self.Rz(other_p) + @ self.Ry(reused_p) + @ self.Rx(extra_param) + @ zero_state ) ) / 2 expected = grad_true0 + grad_true1 # product rule @@ -765,10 +635,10 @@ def circuit(params): qml.Rot(params[1], params[0], 2 * params[0], wires=[0]) return qml.expval(qml.PauliX(0)) - spy_analytic = mocker.spy(dev.target_device, "adjoint_jacobian") + spy_analytic = mocker.spy(dev, "LightningAdjointJacobian") - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) cost = QNode(circuit, dev, diff_method="finite-diff", h=h) @@ -798,7 +668,7 @@ def f(params1, params2): qml.RY(tf.cos(params2), wires=[0]) return qml.expval(qml.PauliZ(0)) - if dev.R_DTYPE == np.float32: + if dev.r_dtype == np.float32: tf_r_dtype = tf.float32 else: tf_r_dtype = tf.float64 @@ -806,8 +676,8 @@ def f(params1, params2): params1 = tf.Variable(0.3, dtype=tf_r_dtype) params2 = tf.Variable(0.4, dtype=tf_r_dtype) - h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) qnode1 = QNode(f, dev, interface="tf", diff_method="adjoint") qnode2 = QNode(f, dev, interface="tf", diff_method="finite-diff", h=h) @@ -839,7 +709,7 @@ def f(params1, params2): params1 = torch.tensor(0.3, requires_grad=True) params2 = torch.tensor(0.4, requires_grad=True) - h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) qnode1 = QNode(f, dev, interface="torch", diff_method="adjoint") qnode2 = QNode(f, dev, interface="torch", diff_method="finite-diff", h=h) @@ -861,7 +731,7 @@ def test_interface_jax(self, dev): jax interface""" jax = pytest.importorskip("jax") - if dev.R_DTYPE == np.float64: + if dev.c_dtype == np.complex128: from jax import config # pylint: disable=import-outside-toplevel config.update("jax_enable_x64", True) @@ -872,11 +742,13 @@ def f(params1, params2): qml.RY(jax.numpy.cos(params2), wires=[0]) return qml.expval(qml.PauliZ(0)) - params1 = jax.numpy.array(0.3, dev.R_DTYPE) - params2 = jax.numpy.array(0.4, dev.R_DTYPE) + r_dtype = np.float32 if dev.c_dtype == np.complex64 else np.float64 + + params1 = jax.numpy.array(0.3, r_dtype) + params2 = jax.numpy.array(0.4, r_dtype) - h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) qnode_adjoint = QNode(f, dev, interface="jax", diff_method="adjoint") qnode_fd = QNode(f, dev, interface="jax", diff_method="finite-diff", h=h) @@ -1379,8 +1251,8 @@ def test_qubit_unitary(dev, n_targets): """Tests that ``qml.QubitUnitary`` can be included in circuits differentiated with the adjoint method.""" n_wires = len(dev.wires) dev_def = qml.device("default.qubit", wires=n_wires) - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - c_dtype = np.complex64 if dev.R_DTYPE == np.float32 else np.complex128 + h = 1e-3 if dev.c_dtype == np.complex64 else 1e-7 + c_dtype = dev.c_dtype np.random.seed(1337) par = 2 * np.pi * np.random.rand(n_wires) @@ -1427,8 +1299,8 @@ def test_diff_qubit_unitary(dev, n_targets): """Tests that ``qml.QubitUnitary`` can be differentiated with the adjoint method.""" n_wires = len(dev.wires) dev_def = qml.device("default.qubit", wires=n_wires) - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - c_dtype = np.complex64 if dev.R_DTYPE == np.float32 else np.complex128 + h = 1e-3 if dev.c_dtype == np.complex64 else 1e-7 + c_dtype = dev.c_dtype np.random.seed(1337) par = 2 * np.pi * np.random.rand(n_wires) diff --git a/mpitests/test_apply.py b/mpitests/test_apply.py index 17d91cd2d..5987626f1 100644 --- a/mpitests/test_apply.py +++ b/mpitests/test_apply.py @@ -34,14 +34,17 @@ ) -def create_random_init_state(numWires, R_DTYPE, seed_value=48): +def create_random_init_state(numWires, c_dtype, seed_value=48): """Returns a random initial state of a certain type.""" np.random.seed(seed_value) - num_elements = 1 << numWires - init_state = np.random.rand(num_elements).astype(R_DTYPE) + 1j * np.random.rand( + + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + + num_elements = 2**numWires + init_state = np.random.rand(num_elements).astype(r_dtype) + 1j * np.random.rand( num_elements - ).astype(R_DTYPE) - scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(R_DTYPE) + ).astype(r_dtype) + scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(r_dtype) init_state = init_state / scale_sum return init_state @@ -54,16 +57,13 @@ def apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 + c_dtype = dev_mpi.c_dtype - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) comm.Scatter(state_vector, local_state_vector, root=0) @@ -84,45 +84,6 @@ def circuit(*params): assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) -def apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires): - """Wrapper applying a parametric gate with the apply method.""" - num_wires = numQubits - comm = MPI.COMM_WORLD - commSize = comm.Get_size() - num_global_wires = commSize.bit_length() - 1 - num_local_wires = num_wires - num_global_wires - - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 - - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) - - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) - comm.Bcast(state_vector, root=0) - - comm.Scatter(state_vector, local_state_vector, root=0) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - - @qml.qnode(dev_cpu) - def circuit(*params): - qml.StatePrep(state_vector, wires=range(num_wires)) - operation(*params, wires=Wires) - return qml.state() - - expected_output_cpu = np.array(circuit(*par)).astype(c_dtype) - comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) - - dev_mpi.syncH2D(local_state_vector) - dev_mpi.apply([operation(*par, wires=Wires)]) - dev_mpi.syncD2H(local_state_vector) - - assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) - - def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): """Wrapper applying a non-parametric gate with QNode function.""" num_wires = numQubits @@ -131,16 +92,13 @@ def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 + c_dtype = dev_mpi.c_dtype - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) comm.Scatter(state_vector, local_state_vector, root=0) @@ -161,45 +119,6 @@ def circuit(): assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) -def apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires): - """Wrapper applying a non-parametric gate with the apply method.""" - num_wires = numQubits - comm = MPI.COMM_WORLD - commSize = comm.Get_size() - num_global_wires = commSize.bit_length() - 1 - num_local_wires = num_wires - num_global_wires - - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 - - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) - - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) - comm.Bcast(state_vector, root=0) - - comm.Scatter(state_vector, local_state_vector, root=0) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - - @qml.qnode(dev_cpu) - def circuit(): - qml.StatePrep(state_vector, wires=range(num_wires)) - operation(wires=Wires) - return qml.state() - - expected_output_cpu = np.array(circuit()).astype(c_dtype) - comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) - - dev_mpi.syncH2D(local_state_vector) - dev_mpi.apply([operation(wires=Wires)]) - dev_mpi.syncD2H(local_state_vector) - - assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) - - class TestApply: # pylint: disable=missing-function-docstring,too-many-arguments """Tests whether the device can apply supported quantum gates.""" @@ -220,13 +139,11 @@ def dev_mpi(self, request): @pytest.mark.parametrize("Wires", [0, 1, numQubits - 2, numQubits - 1]) def test_apply_operation_single_wire_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.CNOT, qml.SWAP, qml.CY, qml.CZ]) @pytest.mark.parametrize("Wires", [[0, 1], [numQubits - 2, numQubits - 1], [0, numQubits - 1]]) def test_apply_operation_two_wire_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) @pytest.mark.parametrize( @@ -240,7 +157,6 @@ def test_apply_operation_two_wire_nonparam(self, tol, operation, Wires, dev_mpi) ) def test_apply_operation_three_wire_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) @pytest.mark.parametrize( @@ -254,7 +170,6 @@ def test_apply_operation_three_wire_nonparam(self, tol, operation, Wires, dev_mp ) def test_apply_operation_three_wire_qnode_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.PhaseShift, qml.RX, qml.RY, qml.RZ]) @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) @@ -263,7 +178,6 @@ def test_apply_operation_1gatequbit_1param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize("operation", [qml.Rot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @@ -272,7 +186,6 @@ def test_apply_operation_1gatequbit_3param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize("operation", [qml.CRot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @@ -281,7 +194,6 @@ def test_apply_operation_1gatequbit_3param_cgate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize( "operation", @@ -304,7 +216,6 @@ def test_apply_operation_2gatequbit_1param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize( "operation", @@ -323,7 +234,6 @@ def test_apply_operation_4gatequbit_1param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) # BasisState test @pytest.mark.parametrize("operation", [qml.BasisState]) @@ -337,17 +247,17 @@ def test_state_prep(self, tol, operation, index, dev_mpi): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: + if dev_mpi.c_dtype == np.float32: c_dtype = np.complex64 else: c_dtype = np.complex128 - state_vector = np.zeros(1 << num_wires).astype(c_dtype) - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + state_vector = np.zeros(2**num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) @@ -399,17 +309,17 @@ def test_qubit_state_prep(self, tol, par, Wires, dev_mpi): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: + if dev_mpi.c_dtype == np.float32: c_dtype = np.complex64 else: c_dtype = np.complex128 - state_vector = np.zeros(1 << num_wires).astype(c_dtype) - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + state_vector = np.zeros(2**num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) @@ -435,17 +345,17 @@ def test_dev_reset(self, tol, dev_mpi): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: + if dev_mpi.c_dtype == np.float32: c_dtype = np.complex64 else: c_dtype = np.complex128 - state_vector = np.zeros(1 << num_wires).astype(c_dtype) - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + state_vector = np.zeros(2**num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) @@ -462,10 +372,10 @@ def circuit(): expected_output_cpu = cpu_qnode().astype(c_dtype) comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) - dev_mpi.reset() + dev_mpi._statevector.reset_state() gpumpi_qnode = qml.QNode(circuit, dev_mpi) - dev_mpi.reset() + dev_mpi._statevector.reset_state() local_state_vector = gpumpi_qnode() assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) @@ -474,8 +384,8 @@ def circuit(): class TestSparseHamExpval: # pylint: disable=too-few-public-methods,missing-function-docstring """Tests sparse hamiltonian expectation values.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sparse_hamiltonian_expectation(self, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sparse_hamiltonian_expectation(self, c_dtype): comm = MPI.COMM_WORLD commSize = comm.Get_size() num_global_wires = commSize.bit_length() - 1 @@ -496,32 +406,38 @@ def test_sparse_hamiltonian_expectation(self, C_DTYPE): 0.3 + 0.3j, 0.3 + 0.5j, ], - dtype=C_DTYPE, + dtype=c_dtype, ) - local_state_vector = np.zeros(1 << num_local_wires).astype(C_DTYPE) + state_vector /= np.linalg.norm(state_vector) + + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) - dev_gpu = qml.device("lightning.gpu", wires=3, mpi=False, c_dtype=C_DTYPE) - dev_mpi = qml.device("lightning.gpu", wires=3, mpi=True, c_dtype=C_DTYPE) + H_sparse = qml.SparseHamiltonian(Hmat, wires=range(3)) - dev_mpi.syncH2D(local_state_vector) - dev_gpu.syncH2D(state_vector) + def circuit(): + qml.StatePrep(state_vector, wires=range(3)) + return qml.expval(H_sparse) - H_sparse = qml.SparseHamiltonian(Hmat, wires=range(3)) + dev_gpu = qml.device("lightning.gpu", wires=3, mpi=False, c_dtype=c_dtype) + gpu_qnode = qml.QNode(circuit, dev_gpu) + expected_output_gpu = gpu_qnode() + comm.Bcast(np.array(expected_output_gpu), root=0) - comm.Barrier() + dev_mpi = qml.device("lightning.gpu", wires=3, mpi=True, c_dtype=c_dtype) + mpi_qnode = qml.QNode(circuit, dev_mpi) + expected_output_mpi = mpi_qnode() - res = dev_mpi.expval(H_sparse) - expected = dev_gpu.expval(H_sparse) + comm.Barrier() - assert np.allclose(res, expected) + assert np.allclose(expected_output_mpi, expected_output_gpu) class TestExpval: """Tests that expectation values are properly calculated or that the proper errors are raised.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) @pytest.mark.parametrize( "operation", [ @@ -533,7 +449,7 @@ class TestExpval: ], ) @pytest.mark.parametrize("wires", [0, 1, 2, numQubits - 3, numQubits - 2, numQubits - 1]) - def test_expval_single_wire_no_parameters(self, tol, operation, wires, C_DTYPE): + def test_expval_single_wire_no_parameters(self, tol, operation, wires, c_dtype): """Tests that expectation values are properly calculated for single-wire observables without parameters.""" num_wires = numQubits comm = MPI.COMM_WORLD @@ -541,14 +457,14 @@ def test_expval_single_wire_no_parameters(self, tol, operation, wires, C_DTYPE): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - dev_mpi = qml.device("lightning.gpu", wires=numQubits, mpi=True, c_dtype=C_DTYPE) + dev_mpi = qml.device("lightning.gpu", wires=numQubits, mpi=True, c_dtype=c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) - local_state_vector = np.zeros(1 << num_local_wires).astype(C_DTYPE) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) def circuit(): qml.StatePrep(state_vector, wires=range(num_wires)) @@ -563,7 +479,7 @@ def circuit(): assert np.allclose(expected_output_mpi, expected_output_cpu, atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) @pytest.mark.parametrize( "obs", [ @@ -575,12 +491,12 @@ def circuit(): qml.PauliZ(numQubits - 2) @ qml.PauliZ(numQubits - 1), ], ) - def test_expval_multiple_obs(self, obs, tol, C_DTYPE): + def test_expval_multiple_obs(self, obs, tol, c_dtype): """Test expval with Hamiltonian""" num_wires = numQubits - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) def circuit(): qml.RX(0.4, wires=[0]) @@ -592,7 +508,7 @@ def circuit(): assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) @pytest.mark.parametrize( "obs, coeffs", [ @@ -620,14 +536,14 @@ def circuit(): ), ], ) - def test_expval_hamiltonian(self, obs, coeffs, tol, C_DTYPE): + def test_expval_hamiltonian(self, obs, coeffs, tol, c_dtype): """Test expval with Hamiltonian""" num_wires = numQubits ham = qml.Hamiltonian(coeffs, obs) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) def circuit(): qml.RX(0.4, wires=[0]) @@ -665,14 +581,14 @@ def circuit(): class TestGenerateSample: """Tests that samples are properly calculated.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sample_dimensions(self, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sample_dimensions(self, c_dtype): """Tests if the samples returned by sample have the correct dimensions """ num_wires = numQubits - dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) ops = [qml.RX(1.5708, wires=[0]), qml.RX(1.5708, wires=[1])] @@ -697,14 +613,14 @@ def test_sample_dimensions(self, C_DTYPE): assert np.array_equal(s3.shape, (shots,)) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sample_values(self, tol, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sample_values(self, tol, c_dtype): """Tests if the samples returned by sample have the correct values """ num_wires = numQubits - dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) shots = qml.measurements.Shots(1000) ops = [qml.RX(1.5708, wires=[0])] @@ -716,17 +632,17 @@ def test_sample_values(self, tol, C_DTYPE): # they square to 1 assert np.allclose(s1**2, 1, atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sample_values_qnode(self, tol, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sample_values_qnode(self, tol, c_dtype): """Tests if the samples returned by sample have the correct values """ num_wires = numQubits dev_mpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) - dev_mpi.reset() + dev_mpi._statevector.reset_state() @qml.qnode(dev_mpi) def circuit(): @@ -737,15 +653,15 @@ def circuit(): # they square to 1 assert np.allclose(circuit() ** 2, 1, atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_multi_samples_return_correlated_results(self, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_multi_samples_return_correlated_results(self, c_dtype): """Tests if the samples returned by the sample function have the correct dimensions """ num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) @qml.qnode(dev_gpumpi) @@ -758,13 +674,13 @@ def circuit(): assert np.array_equal(outcomes[0], outcomes[1]) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_paulix_pauliy(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_paulix_pauliy(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliX and PauliY works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 @@ -800,13 +716,13 @@ def circuit(): ) / 16 assert np.allclose(var, expected, atol=tol) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_pauliz_hadamard(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_pauliz_hadamard(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliZ and PauliY and hadamard works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 @@ -846,13 +762,13 @@ def circuit(): class TestTensorVar: """Test tensor variance measurements.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_paulix_pauliy(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_paulix_pauliy(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliX and PauliY works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 @@ -880,12 +796,12 @@ def circuit(): ) / 16 assert np.allclose(res, expected, atol=tol) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_pauliz_hadamard(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_pauliz_hadamard(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliZ and PauliY and hadamard works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 diff --git a/mpitests/test_device.py b/mpitests/test_device.py index 03a188011..dd783dbee 100644 --- a/mpitests/test_device.py +++ b/mpitests/test_device.py @@ -38,13 +38,13 @@ def test_create_device(): def test_unsupported_mpi_buf_size(): - with pytest.raises(TypeError, match="Unsupported mpi_buf_size value"): + with pytest.raises(ValueError, match="Unsupported mpi_buf_size value"): dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=-1) - with pytest.raises(TypeError, match="Unsupported mpi_buf_size value"): + with pytest.raises(ValueError, match="Unsupported mpi_buf_size value"): dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=3) - with pytest.warns( - RuntimeWarning, - match="The MPI buffer size is larger than the local state vector size", + with pytest.raises( + RuntimeError, + match="The MPI buffer size is larger than the local state vector size.", ): dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=2**4) with pytest.raises( diff --git a/mpitests/test_expval.py b/mpitests/test_expval.py index d020471c0..3ca73cd82 100644 --- a/mpitests/test_expval.py +++ b/mpitests/test_expval.py @@ -22,114 +22,260 @@ from conftest import PHI, THETA, VARPHI, device_name from mpi4py import MPI +numQubits = 8 -@pytest.mark.parametrize("theta, phi", list(zip(THETA, PHI))) -class TestExpval: - """Test expectation values""" - def test_identity_expectation(self, theta, phi, tol): - """Test that identity expectation value (i.e. the trace) is 1""" - dev = qml.device(device_name, mpi=True, wires=3) +def create_random_init_state(numWires, c_dtype, seed_value=48): + """Returns a random initial state of a certain type.""" + np.random.seed(seed_value) - O1 = qml.Identity(wires=[0]) - O2 = qml.Identity(wires=[1]) + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 - dev.apply( - [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], - ) + num_elements = 2**numWires + init_state = np.random.rand(num_elements).astype(r_dtype) + 1j * np.random.rand( + num_elements + ).astype(r_dtype) - res = np.array([dev.expval(O1), dev.expval(O2)]) - assert np.allclose(res, np.array([1, 1]), tol) + init_state = init_state / np.linalg.norm(init_state) + return init_state - def test_pauliz_expectation(self, theta, phi, tol): - """Test that PauliZ expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) - O1 = qml.PauliZ(wires=[0]) - O2 = qml.PauliZ(wires=[1]) +def apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires): + """Wrapper applying a parametric gate with QNode function.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires - dev.apply( - [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], - ) + c_dtype = dev_mpi.c_dtype - res = np.array([dev.expval(O1), dev.expval(O2)]) - assert np.allclose(res, np.array([np.cos(theta), np.cos(theta) * np.cos(phi)]), tol) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - def test_paulix_expectation(self, theta, phi, tol): - """Test that PauliX expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) + comm.Bcast(state_vector, root=0) - O1 = qml.PauliX(wires=[0]) - O2 = qml.PauliX(wires=[1]) + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - dev.apply( - [qml.RY(theta, wires=[0]), qml.RY(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], - ) + def circuit(*params): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(*params, wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode(*par).astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_state_vector = mpi_qnode(*par) + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): + """Wrapper applying a non-parametric gate with QNode function.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + c_dtype = dev_mpi.c_dtype + + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) + comm.Bcast(state_vector, root=0) - res = np.array([dev.expval(O1), dev.expval(O2)], dtype=dev.C_DTYPE) - assert np.allclose( - res, - np.array([np.sin(theta) * np.sin(phi), np.sin(phi)], dtype=dev.C_DTYPE), - tol * 10, + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode().astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_state_vector = mpi_qnode() + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +@pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) +@pytest.mark.parametrize("batch_obs", [True, False]) +class TestExpval: + """Tests that expectation values are properly calculated or that the proper errors are raised.""" + + @pytest.mark.parametrize( + "operation", + [ + qml.PauliX, + qml.PauliY, + qml.PauliZ, + qml.Hadamard, + qml.Identity, + ], + ) + @pytest.mark.parametrize("wires", [0, 1, 2, numQubits - 2, numQubits - 1]) + def test_expval_single_wire_no_parameters(self, tol, operation, wires, c_dtype, batch_obs): + """Tests that expectation values are properly calculated for single-wire observables without parameters.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + + dev_mpi = qml.device( + "lightning.gpu", wires=numQubits, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs ) - def test_pauliy_expectation(self, theta, phi, tol): - """Test that PauliY expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - O1 = qml.PauliY(wires=[0]) - O2 = qml.PauliY(wires=[1]) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) + comm.Bcast(state_vector, root=0) - dev.apply( - [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + return qml.expval(operation(wires)) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode() + comm.Bcast(np.array(expected_output_cpu), root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + expected_output_mpi = mpi_qnode() + + assert np.allclose(expected_output_mpi, expected_output_cpu, atol=tol, rtol=0) + + @pytest.mark.parametrize( + "obs", + [ + qml.PauliX(0) @ qml.PauliZ(1), + qml.PauliX(0) @ qml.PauliZ(numQubits - 1), + qml.PauliX(numQubits - 2) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + qml.PauliZ(0) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(numQubits - 2) @ qml.PauliZ(numQubits - 1), + ], + ) + def test_expval_multiple_obs(self, obs, tol, c_dtype, batch_obs): + """Test expval with Hamiltonian""" + num_wires = numQubits + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs ) - res = np.array([dev.expval(O1), dev.expval(O2)]) - assert np.allclose(res, np.array([0, -np.cos(theta) * np.sin(phi)]), tol) + def circuit(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[num_wires - 1]) + return qml.expval(obs) - def test_hadamard_expectation(self, theta, phi, tol): - """Test that Hadamard expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + @pytest.mark.parametrize( + "obs, coeffs", + [ + ([qml.PauliX(0) @ qml.PauliZ(1)], [0.314]), + ([qml.PauliX(0) @ qml.PauliZ(numQubits - 1)], [0.314]), + ([qml.PauliZ(0) @ qml.PauliZ(1)], [0.314]), + ([qml.PauliZ(0) @ qml.PauliZ(numQubits - 1)], [0.314]), + ( + [qml.PauliX(0) @ qml.PauliZ(1), qml.PauliZ(0) @ qml.PauliZ(1)], + [0.314, 0.2], + ), + ( + [ + qml.PauliX(0) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + ], + [0.314, 0.2], + ), + ( + [ + qml.PauliX(numQubits - 2) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + ], + [0.314, 0.2], + ), + ], + ) + def test_expval_hamiltonian(self, obs, coeffs, tol, c_dtype, batch_obs): + """Test expval with Hamiltonian""" + num_wires = numQubits + + ham = qml.Hamiltonian(coeffs, obs) + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs + ) + + def circuit(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[numQubits - 1]) + return qml.expval(ham) - O1 = qml.Hadamard(wires=[0]) - O2 = qml.Hadamard(wires=[1]) + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) - dev.apply( - [qml.RY(theta, wires=[0]), qml.RY(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + def test_expval_non_pauli_word_hamiltionian(self, tol, c_dtype, batch_obs): + """Tests expectation values of non-Pauli word Hamiltonians.""" + dev_mpi = qml.device( + "lightning.gpu", wires=3, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs ) + dev_cpu = qml.device("lightning.qubit", wires=3) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.expval(0.5 * qml.Hadamard(2)) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) - res = np.array([dev.expval(O1), dev.expval(O2)]) - expected = np.array( - [ - np.sin(theta) * np.sin(phi) + np.cos(theta), - np.cos(theta) * np.cos(phi) + np.sin(phi), - ] - ) / np.sqrt(2) - assert np.allclose(res, expected, tol) - - @pytest.mark.parametrize("n_wires", range(1, 8)) - def test_hermitian_expectation(self, n_wires, theta, phi, tol): + @pytest.mark.parametrize("theta, phi", list(zip(THETA, PHI))) + @pytest.mark.parametrize("n_wires", range(1, numQubits)) + def test_hermitian_expectation(self, n_wires, theta, phi, tol, c_dtype, batch_obs): """Test that Hadamard expectation value is correct""" - n_qubits = 7 + n_qubits = numQubits - 1 dev_def = qml.device("default.qubit", wires=n_qubits) - dev = qml.device(device_name, mpi=True, wires=n_qubits) + dev = qml.device( + device_name, mpi=True, wires=n_qubits, c_dtype=c_dtype, batch_obs=batch_obs + ) comm = MPI.COMM_WORLD m = 2**n_wires U = np.random.rand(m, m) + 1j * np.random.rand(m, m) U = U + np.conj(U.T) - U = U.astype(dev.C_DTYPE) + U = U.astype(dev.c_dtype) comm.Bcast(U, root=0) obs = qml.Hermitian(U, wires=range(n_wires)) init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) - init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) - init_state = init_state.astype(dev.C_DTYPE) + init_state = init_state / np.linalg.norm(init_state) + init_state = init_state.astype(dev.c_dtype) comm.Bcast(init_state, root=0) def circuit(): @@ -250,69 +396,39 @@ def circuit(x, y): class TestTensorExpval: """Test tensor expectation values""" - def test_paulix_pauliy(self, theta, phi, varphi, tol): + @pytest.mark.parametrize( + "obs,expected", + [ + (qml.PauliX(0) @ qml.PauliY(2), "PXPY"), + (qml.PauliZ(0) @ qml.Identity(1) @ qml.PauliZ(2), "PZIPZ"), + (qml.PauliZ(0) @ qml.Hadamard(1) @ qml.PauliY(2), "PZHPY"), + ], + ) + def test_tensor(self, theta, phi, varphi, obs, expected, tol): """Test that a tensor product involving PauliX and PauliY works correctly""" dev = qml.device(device_name, mpi=True, wires=3) - obs = qml.PauliX(0) @ qml.PauliY(2) - - dev.apply( - [ - qml.RX(theta, wires=[0]), - qml.RX(phi, wires=[1]), - qml.RX(varphi, wires=[2]), - qml.CNOT(wires=[0, 1]), - qml.CNOT(wires=[1, 2]), - ], - rotations=obs.diagonalizing_gates(), - ) - res = dev.expval(obs) - - expected = np.sin(theta) * np.sin(phi) * np.sin(varphi) - - assert np.allclose(res, expected, atol=tol) - - def test_pauliz_identity(self, theta, phi, varphi, tol): - """Test that a tensor product involving PauliZ and Identity works - correctly""" - dev = qml.device(device_name, mpi=True, wires=3) - obs = qml.PauliZ(0) @ qml.Identity(1) @ qml.PauliZ(2) - - dev.apply( - [ - qml.RX(theta, wires=[0]), - qml.RX(phi, wires=[1]), - qml.RX(varphi, wires=[2]), - qml.CNOT(wires=[0, 1]), - qml.CNOT(wires=[1, 2]), - ], - rotations=obs.diagonalizing_gates(), - ) - - res = dev.expval(obs) - - expected = np.cos(varphi) * np.cos(phi) - assert np.allclose(res, expected, tol) - - def test_pauliz_hadamard_pauliy(self, theta, phi, varphi, tol): - """Test that a tensor product involving PauliZ and PauliY and Hadamard - works correctly""" - dev = qml.device(device_name, mpi=True, wires=3) - obs = qml.PauliZ(0) @ qml.Hadamard(1) @ qml.PauliY(2) - - dev.apply( - [ - qml.RX(theta, wires=[0]), - qml.RX(phi, wires=[1]), - qml.RX(varphi, wires=[2]), - qml.CNOT(wires=[0, 1]), - qml.CNOT(wires=[1, 2]), - ], - rotations=obs.diagonalizing_gates(), - ) + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.expval(obs) - res = dev.expval(obs) - expected = -(np.cos(varphi) * np.sin(phi) + np.sin(varphi) * np.cos(theta)) / np.sqrt(2) + mpi_qnode = qml.QNode(circuit, dev) + res = mpi_qnode() + + if expected == "PXPY": + expected_val = np.sin(theta) * np.sin(phi) * np.sin(varphi) + elif expected == "PZIPZ": + expected_val = np.cos(varphi) * np.cos(phi) + elif expected == "PZHPY": + expected_val = -( + np.cos(varphi) * np.sin(phi) + np.sin(varphi) * np.cos(theta) + ) / np.sqrt(2) + else: + expected_val = 0 - assert np.allclose(res, expected, tol) + assert np.allclose(res, expected_val, atol=tol) diff --git a/mpitests/test_probs.py b/mpitests/test_probs.py index b2f57f733..ed9ab9b9c 100644 --- a/mpitests/test_probs.py +++ b/mpitests/test_probs.py @@ -23,27 +23,31 @@ numQubits = 8 -def create_random_init_state(numWires, R_DTYPE, seed_value=48): +def create_random_init_state(numWires, c_dtype, seed_value=48): + """Returns a random initial state of a certain type.""" np.random.seed(seed_value) - num_elements = 1 << numWires - init_state = np.random.rand(num_elements).astype(R_DTYPE) + 1j * np.random.rand( + + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + + num_elements = 2**numWires + init_state = np.random.rand(num_elements).astype(r_dtype) + 1j * np.random.rand( num_elements - ).astype(R_DTYPE) - scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(R_DTYPE) - init_state = init_state / scale_sum + ).astype(r_dtype) + + init_state = init_state / np.linalg.norm(init_state) return init_state -def apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE): +def apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype): num_wires = numQubits comm = MPI.COMM_WORLD rank = comm.Get_rank() commSize = comm.Get_size() - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) def circuit(): @@ -58,15 +62,16 @@ def circuit(): local_probs = mpi_qnode() recv_counts = comm.gather(len(local_probs), root=0) - comm.Barrier() + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + if rank == 0: - probs_mpi = np.zeros(1 << len(Wires)).astype(dev_mpi.R_DTYPE) - displacements = [i for i in range(commSize)] + probs_mpi = np.zeros(2 ** len(Wires)).astype(r_dtype) else: probs_mpi = None probs_cpu = None + comm.Barrier() comm.Gatherv(local_probs, [probs_mpi, recv_counts], root=0) @@ -75,16 +80,16 @@ def circuit(): comm.Barrier() -def apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE): +def apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype): num_wires = numQubits comm = MPI.COMM_WORLD rank = comm.Get_rank() commSize = comm.Get_size() - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) def circuit(): @@ -102,8 +107,10 @@ def circuit(): comm.Barrier() + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + if rank == 0: - probs_mpi = np.zeros(1 << len(Wires)).astype(dev_mpi.R_DTYPE) + probs_mpi = np.zeros(2 ** len(Wires)).astype(r_dtype) else: probs_mpi = None probs_cpu = None @@ -116,6 +123,19 @@ def circuit(): comm.Barrier() +@pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], +) +@pytest.mark.parametrize("c_dtype", [np.complex128]) class TestProbs: """Tests for the probability method.""" @@ -123,41 +143,15 @@ class TestProbs: "operation", [qml.PauliX, qml.PauliY, qml.PauliZ, qml.Hadamard, qml.S, qml.T] ) @pytest.mark.parametrize("GateWires", [[0], [numQubits - 1]]) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_single_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): - apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + def test_prob_single_wire_nonparam(self, tol, operation, GateWires, Wires, c_dtype): + apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.CNOT, qml.SWAP, qml.CY, qml.CZ]) @pytest.mark.parametrize( "GateWires", [[0, 1], [numQubits - 2, numQubits - 1], [0, numQubits - 1]] ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): - apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, c_dtype): + apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) @pytest.mark.parametrize( @@ -169,80 +163,28 @@ def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE) [0, numQubits - 2, numQubits - 1], ], ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_three_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): - apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + def test_prob_three_wire_nonparam(self, tol, operation, GateWires, Wires, c_dtype): + apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.PhaseShift, qml.RX, qml.RY, qml.RZ]) @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) @pytest.mark.parametrize("GateWires", [0, numQubits - 1]) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_single_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_single_wire_param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.Rot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @pytest.mark.parametrize("GateWires", [0, numQubits - 1]) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_single_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_single_wire_3param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.CRot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @pytest.mark.parametrize( "GateWires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]] ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize( "operation", @@ -263,21 +205,8 @@ def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTY @pytest.mark.parametrize( "GateWires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]] ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize( "operation", @@ -292,18 +221,5 @@ def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYP [numQubits - 4, numQubits - 3, numQubits - 2, numQubits - 1], ], ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_four_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_four_wire_param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) diff --git a/pennylane_lightning/core/_adjoint_jacobian_base.py b/pennylane_lightning/core/_adjoint_jacobian_base.py index 50046d5f9..a779c0cc4 100644 --- a/pennylane_lightning/core/_adjoint_jacobian_base.py +++ b/pennylane_lightning/core/_adjoint_jacobian_base.py @@ -111,6 +111,7 @@ def _process_jacobian_tape(self, tape: QuantumTape, split_obs: bool = False): self._qubit_state.device_name, use_csingle, use_mpi, split_obs ).serialize_ops(tape) + # pylint: disable=not-callable ops_serialized = self._create_ops_list_lightning(*ops_serialized) # We need to filter out indices in trainable_params which do not diff --git a/pennylane_lightning/core/_measurements_base.py b/pennylane_lightning/core/_measurements_base.py index 06ae87889..badbe7ec4 100644 --- a/pennylane_lightning/core/_measurements_base.py +++ b/pennylane_lightning/core/_measurements_base.py @@ -56,6 +56,7 @@ def __init__( ) -> None: self._qubit_state = qubit_state + self._use_mpi = False # Dummy for the C++ bindings self._measurement_lightning = None @@ -94,7 +95,6 @@ def state_diagonalizing_gates(self, measurementprocess: StateMeasurement) -> Ten self._qubit_state.apply_operations([qml.adjoint(g) for g in reversed(diagonalizing_gates)]) return result - # pylint: disable=protected-access def expval(self, measurementprocess: MeasurementProcess): """Expectation value of the supplied observable contained in the MeasurementProcess. @@ -121,8 +121,9 @@ def expval(self, measurementprocess: MeasurementProcess): or (measurementprocess.obs.arithmetic_depth > 0) or isinstance(measurementprocess.obs.name, List) ): + # pylint: disable=protected-access ob_serialized = QuantumScriptSerializer( - self._qubit_state.device_name, self.dtype == np.complex64 + self._qubit_state.device_name, self.dtype == np.complex64, self._use_mpi )._ob(measurementprocess.obs) return self._measurement_lightning.expval(ob_serialized) @@ -130,24 +131,37 @@ def expval(self, measurementprocess: MeasurementProcess): measurementprocess.obs.name, measurementprocess.obs.wires ) + def _probs_retval_conversion(self, probs_results: Any) -> np.ndarray: + """Convert the data structure from the C++ backend to a common structure through lightning devices. + Args: + probs_result (Any): Result provided by C++ backend. + Returns: + np.ndarray with probabilities of the supplied observable or wires. + """ + return probs_results + def probs(self, measurementprocess: MeasurementProcess): """Probabilities of the supplied observable or wires contained in the MeasurementProcess. Args: - measurementprocess (StateMeasurement): measurement to apply to the state + measurementprocess (StateMeasurement): measurement to apply to the state. Returns: - Probabilities of the supplied observable or wires + Probabilities of the supplied observable or wires. """ diagonalizing_gates = measurementprocess.diagonalizing_gates() + if diagonalizing_gates: self._qubit_state.apply_operations(diagonalizing_gates) + results = self._measurement_lightning.probs(measurementprocess.wires.tolist()) + if diagonalizing_gates: self._qubit_state.apply_operations( [qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)] ) - return results + + return self._probs_retval_conversion(results) def var(self, measurementprocess: MeasurementProcess): """Variance of the supplied observable contained in the MeasurementProcess. @@ -175,8 +189,9 @@ def var(self, measurementprocess: MeasurementProcess): or (measurementprocess.obs.arithmetic_depth > 0) or isinstance(measurementprocess.obs.name, List) ): + # pylint: disable=protected-access ob_serialized = QuantumScriptSerializer( - self._qubit_state.device_name, self.dtype == np.complex64 + self._qubit_state.device_name, self.dtype == np.complex64, self._use_mpi )._ob(measurementprocess.obs) return self._measurement_lightning.var(ob_serialized) @@ -187,6 +202,7 @@ def var(self, measurementprocess: MeasurementProcess): def get_measurement_function( self, measurementprocess: MeasurementProcess ) -> Callable[[MeasurementProcess, TensorLike], TensorLike]: + # pylint: disable=too-many-return-statements """Get the appropriate method for performing a measurement. Args: @@ -197,16 +213,24 @@ def get_measurement_function( """ if isinstance(measurementprocess, StateMeasurement): if isinstance(measurementprocess, ExpectationMP): - if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): - return self.state_diagonalizing_gates + if self._use_mpi: + if isinstance(measurementprocess.obs, (qml.Projector)): + return self.state_diagonalizing_gates + else: + if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): + return self.state_diagonalizing_gates return self.expval if isinstance(measurementprocess, ProbabilityMP): return self.probs if isinstance(measurementprocess, VarianceMP): - if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): - return self.state_diagonalizing_gates + if self._use_mpi: + if isinstance(measurementprocess.obs, (qml.Projector)): + return self.state_diagonalizing_gates + else: + if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): + return self.state_diagonalizing_gates return self.var if measurementprocess.obs is None or measurementprocess.obs.has_diagonalizing_gates: return self.state_diagonalizing_gates diff --git a/pennylane_lightning/core/_state_vector_base.py b/pennylane_lightning/core/_state_vector_base.py index 3e08a5ab4..8815e13a0 100644 --- a/pennylane_lightning/core/_state_vector_base.py +++ b/pennylane_lightning/core/_state_vector_base.py @@ -16,7 +16,7 @@ """ from abc import ABC, abstractmethod -from typing import Union +from typing import Optional, Union import numpy as np from pennylane import BasisState, StatePrep @@ -101,7 +101,7 @@ def reset_state(self): self._qubit_state.resetStateVector() @abstractmethod - def _apply_state_vector(self, state, device_wires: Wires): + def _apply_state_vector(self, state, device_wires: Wires, sync: Optional[bool] = None): """Initialize the internal state vector in a specified state. Args: state (array[complex]): normalized input state of length ``2**len(wires)`` @@ -117,6 +117,7 @@ def _apply_basis_state(self, state, wires): consisting of 0s and 1s. wires (Wires): wires that the provided computational state should be initialized on + use_async(Optional[bool]): immediately sync with host-sv after applying operation. Note: This function does not support broadcasted inputs yet. """ diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index cb8540718..9b3cc2f16 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.39.0-dev35" +__version__ = "0.39.0-dev36" diff --git a/pennylane_lightning/core/lightning_newAPI_base.py b/pennylane_lightning/core/lightning_newAPI_base.py index dcee73fd5..12cdf98b4 100644 --- a/pennylane_lightning/core/lightning_newAPI_base.py +++ b/pennylane_lightning/core/lightning_newAPI_base.py @@ -90,6 +90,49 @@ def c_dtype(self): def _set_lightning_classes(self): """Load the LightningStateVector, LightningMeasurements, LightningAdjointJacobian as class attribute""" + @abstractmethod + def _setup_execution_config(self, config): + """ + Update the execution config with choices for how the device should be used and the device options. + """ + + @abstractmethod + def preprocess(self, execution_config: ExecutionConfig = DefaultExecutionConfig): + """This function defines the device transform program to be applied and an updated device configuration. + + Args: + execution_config (Union[ExecutionConfig, Sequence[ExecutionConfig]]): A data structure describing the + parameters needed to fully describe the execution. + + Returns: + TransformProgram, ExecutionConfig: A transform program that when called returns :class:`~.QuantumTape`'s that the + device can natively execute as well as a postprocessing function to be called after execution, and a configuration + with unset specifications filled in. + + This device: + + * Supports any qubit operations that provide a matrix + * Currently does not support finite shots + * Currently does not intrinsically support parameter broadcasting + + """ + + @abstractmethod + def execute( + self, + circuits: QuantumTape_or_Batch, + execution_config: ExecutionConfig = DefaultExecutionConfig, + ) -> Result_or_ResultBatch: + """Execute a circuit or a batch of circuits and turn it into results. + + Args: + circuits (Union[QuantumTape, Sequence[QuantumTape]]): the quantum circuits to be executed + execution_config (ExecutionConfig): a datastructure with additional information required for execution + + Returns: + TensorLike, tuple[TensorLike], tuple[tuple[TensorLike]]: A numeric result of the computation. + """ + @abstractmethod def simulate( self, @@ -112,6 +155,25 @@ def simulate( Note that this function can return measurements for non-commuting observables simultaneously. """ + @abstractmethod + def supports_derivatives( + self, + execution_config: Optional[ExecutionConfig] = None, + circuit: Optional[qml.tape.QuantumTape] = None, + ) -> bool: + """Check whether or not derivatives are available for a given configuration and circuit. + + ``LightningGPU`` supports adjoint differentiation with analytic results. + + Args: + execution_config (ExecutionConfig): The configuration of the desired derivative calculation + circuit (QuantumTape): An optional circuit to check derivatives support for. + + Returns: + Bool: Whether or not a derivative can be calculated provided the given information + + """ + def jacobian( self, circuit: QuantumTape, @@ -135,6 +197,7 @@ def jacobian( [circuit], _ = qml.map_wires(circuit, wire_map) state.reset_state() final_state = state.get_final_state(circuit) + # pylint: disable=not-callable return self.LightningAdjointJacobian(final_state, batch_obs=batch_obs).calculate_jacobian( circuit ) @@ -163,6 +226,7 @@ def simulate_and_jacobian( if wire_map is not None: [circuit], _ = qml.map_wires(circuit, wire_map) res = self.simulate(circuit, state) + # pylint: disable=not-callable jac = self.LightningAdjointJacobian(state, batch_obs=batch_obs).calculate_jacobian(circuit) return res, jac @@ -193,6 +257,7 @@ def vjp( # pylint: disable=too-many-arguments [circuit], _ = qml.map_wires(circuit, wire_map) state.reset_state() final_state = state.get_final_state(circuit) + # pylint: disable=not-callable return self.LightningAdjointJacobian(final_state, batch_obs=batch_obs).calculate_vjp( circuit, cotangents ) @@ -224,6 +289,7 @@ def simulate_and_vjp( # pylint: disable=too-many-arguments if wire_map is not None: [circuit], _ = qml.map_wires(circuit, wire_map) res = self.simulate(circuit, state) + # pylint: disable=not-callable _vjp = self.LightningAdjointJacobian(state, batch_obs=batch_obs).calculate_vjp( circuit, cotangents ) diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp index 3753f792f..a98b51df6 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp @@ -260,13 +260,11 @@ class StateVectorCudaMPI final */ void setBasisState(const std::complex &value, const std::size_t index, const bool async = false) { - std::size_t rankId = index >> BaseType::getNumQubits(); + const std::size_t rankId = index >> this->getNumLocalQubits(); + + const std::size_t local_index = + compute_local_index(index, this->getNumLocalQubits()); - std::size_t local_index = - static_cast( - rankId * std::pow(2.0, static_cast( - BaseType::getNumQubits()))) ^ - index; BaseType::getDataBuffer().zeroInit(); CFP_t value_cu = cuUtil::complexToCu>(value); @@ -280,6 +278,45 @@ class StateVectorCudaMPI final mpi_manager_.Barrier(); } + /** + * @brief Prepare a single computational basis state. + * + * @param state Binary number representing the index + * @param wires Wires. + * @param use_async Use an asynchronous memory copy. + */ + void setBasisState(const std::vector &state, + const std::vector &wires, + const bool use_async) { + PL_ABORT_IF_NOT(state.size() == wires.size(), + "state and wires must have equal dimensions."); + + const auto n_wires = this->getTotalNumQubits(); + + std::size_t index{0U}; + for (std::size_t k = 0; k < n_wires; k++) { + const auto bit = state[k]; + index |= bit << (n_wires - 1 - wires[k]); + } + + const std::size_t rankId = index >> this->getNumLocalQubits(); + const std::size_t local_index = + compute_local_index(index, this->getNumLocalQubits()); + + const std::complex value(1.0, 0.0); + CFP_t value_cu = cuUtil::complexToCu>(value); + + BaseType::getDataBuffer().zeroInit(); + + auto stream_id = localStream_.get(); + + if (mpi_manager_.getRank() == rankId) { + setBasisState_CUDA(BaseType::getData(), value_cu, local_index, + use_async, stream_id); + } + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + mpi_manager_.Barrier(); + } /** * @brief Set values for a batch of elements of the state-vector. This * method is implemented by the customized CUDA kernel defined in the @@ -307,11 +344,9 @@ class StateVectorCudaMPI final static_cast(index) >> BaseType::getNumQubits(); if (rankId == mpi_manager_.getRank()) { - int local_index = - static_cast( - rankId * std::pow(2.0, static_cast( - BaseType::getNumQubits()))) ^ - index; + int local_index = static_cast( + compute_local_index(static_cast(index), + this->getNumLocalQubits())); indices_local.push_back(local_index); values_local.push_back(values[i]); } diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp index 716d95c89..b33c16471 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp @@ -184,6 +184,42 @@ class StateVectorCudaManaged stream_id); } + /** + * @brief Prepare a single computational basis state. + * + * @param state Binary number representing the index + * @param wires Wires. + * @param use_async(Optional[bool]): immediately sync with host-sv after + applying operation. + + */ + void setBasisState(const std::vector &state, + const std::vector &wires, + const bool use_async = false) { + PL_ABORT_IF_NOT(state.size() == wires.size(), + "state and wires must have equal dimensions."); + const auto num_qubits = BaseType::getNumQubits(); + PL_ABORT_IF_NOT( + std::find_if(wires.begin(), wires.end(), + [&num_qubits](const auto i) { + return i >= num_qubits; + }) == wires.end(), + "wires must take values lower than the number of qubits."); + const auto n_wires = wires.size(); + std::size_t index{0U}; + for (std::size_t k = 0; k < n_wires; k++) { + const auto bit = static_cast(state[k]); + index |= bit << (num_qubits - 1 - wires[k]); + } + + BaseType::getDataBuffer().zeroInit(); + const std::complex value(1.0, 0.0); + CFP_t value_cu = cuUtil::complexToCu>(value); + auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + setBasisState_CUDA(BaseType::getData(), value_cu, index, use_async, + stream_id); + } + /** * @brief Set values for a batch of elements of the state-vector. This * method is implemented by the customized CUDA kernel defined in the diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp index 5bd92b552..3874d3451 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp @@ -83,12 +83,12 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { })) .def( "setBasisState", - [](StateVectorT &sv, const std::size_t index, - const bool use_async) { - const std::complex value(1, 0); - sv.setBasisState(value, index, use_async); - }, - "Create Basis State on GPU.") + [](StateVectorT &sv, const std::vector &state, + const std::vector &wires, + const bool async) { sv.setBasisState(state, wires, async); }, + py::arg("state") = nullptr, py::arg("wires") = nullptr, + py::arg("async") = false, + "Set the state vector to a basis state on GPU.") .def( "setStateVector", [](StateVectorT &sv, const np_arr_sparse_ind &indices, @@ -152,7 +152,11 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { "Get the GPU index for the statevector data.") .def("numQubits", &StateVectorT::getNumQubits) .def("dataLength", &StateVectorT::getLength) - .def("resetGPU", &StateVectorT::initSV) + .def( + "resetStateVector", + [](StateVectorT &gpu_sv, bool async) { gpu_sv.initSV(async); }, + py::arg("async") = false, + "Initialize the statevector data to the |0...0> state") .def( "apply", [](StateVectorT &sv, const std::string &str, diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp index 620fd9386..83a47ed34 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp @@ -86,6 +86,15 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { })) // qubits, device .def( "setBasisState", + [](StateVectorT &sv, const std::vector &state, + const std::vector &wires, const bool use_async) { + sv.setBasisState(state, wires, use_async); + }, + py::arg("state") = nullptr, py::arg("wires") = nullptr, + py::arg("async") = false, + "Set the state vector to a basis state on GPU.") + .def( + "setBasisStateIndex", [](StateVectorT &sv, const std::size_t index, const bool use_async) { const std::complex value(1, 0); @@ -155,7 +164,11 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { "Get the GPU index for the statevector data.") .def("numQubits", &StateVectorT::getNumQubits) .def("dataLength", &StateVectorT::getLength) - .def("resetGPU", &StateVectorT::initSV) + .def( + "resetStateVector", + [](StateVectorT &gpu_sv, bool async) { gpu_sv.initSV(async); }, + py::arg("async") = false, + "Initialize the statevector data to the |0...0> state") .def( "apply", [](StateVectorT &sv, const std::string &str, diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp index 460a4fa8c..fe19b5d02 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp @@ -273,7 +273,7 @@ class Measurements final PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerSample( this->_statevector.getCusvHandle(), sampler, bitStrings.data(), bitOrdering.data(), bitStringLen, rand_nums.data(), num_samples, - CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER)); + CUSTATEVEC_SAMPLER_OUTPUT_RANDNUM_ORDER)); PL_CUDA_IS_SUCCESS(cudaStreamSynchronize( this->_statevector.getDataBuffer().getDevTag().getStreamID())); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp index 6dd5a0159..4b5a2dd34 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp @@ -36,6 +36,7 @@ namespace { using namespace Pennylane::LightningGPU; using namespace Pennylane::LightningGPU::MPI; +using namespace Pennylane::LightningGPU::Util; using namespace Pennylane::Util; using Pennylane::Util::isApproxEqual; @@ -52,6 +53,23 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::Constructibility", } } +TEMPLATE_TEST_CASE("cuStateVec_helper::compute_local_index", + "[Default Constructibility]", StateVectorCudaMPI<>) { + const std::size_t local_num_qubits = 4; + + SECTION("compute_local_index, index inside the current qubits set") { + const std::size_t index = 2; // 0b00010 + std::size_t local_index = compute_local_index(index, local_num_qubits); + REQUIRE(local_index == index); + } + + SECTION("compute_local_index, index outside the current qubits set") { + const std::size_t index = 16; // 0b10000 + std::size_t local_index = compute_local_index(index, local_num_qubits); + REQUIRE(local_index == 0); + } +} + TEMPLATE_PRODUCT_TEST_CASE("StateVectorCudaMPI::Constructibility", "[General Constructibility]", (StateVectorCudaMPI), (float, double)) { diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp index 8bd27c2dc..ffdefe3e2 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp @@ -101,4 +101,22 @@ inline SharedCusvHandle make_shared_cusv_handle() { PL_CUSTATEVEC_IS_SUCCESS(custatevecCreate(&h)); return {h, handleDeleter()}; } + +/** + * @brief Compute the local index from a given index in multi-gpu workflow + * + * @param index Global index of the target element. + * @param num_qubits Number of wires within the local devices. + * + * @return local_index Local index of the target element. + */ +inline std::size_t compute_local_index(const std::size_t index, + const std::size_t num_qubits) { + // TODO: bound check for the left shift operation here + constexpr std::size_t one{1U}; + const std::size_t local_index = + (index >> num_qubits) * (one << num_qubits) ^ index; + return local_index; +} + } // namespace Pennylane::LightningGPU::Util diff --git a/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py b/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py new file mode 100644 index 000000000..50f9acef3 --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py @@ -0,0 +1,248 @@ +# Copyright 2018-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +r""" +Internal methods for adjoint Jacobian differentiation method. +""" + +from __future__ import annotations + +from warnings import warn + +try: + from pennylane_lightning.lightning_gpu_ops import DevPool + from pennylane_lightning.lightning_gpu_ops.algorithms import ( + AdjointJacobianC64, + AdjointJacobianC128, + create_ops_listC64, + create_ops_listC128, + ) + + try: + from pennylane_lightning.lightning_gpu_ops.algorithmsMPI import ( + AdjointJacobianMPIC64, + AdjointJacobianMPIC128, + create_ops_listMPIC64, + create_ops_listMPIC128, + ) + + mpi_error = None + MPI_SUPPORT = True + except ImportError as ex_mpi: + mpi_error = ex_mpi + MPI_SUPPORT = False + +except ImportError as ex: + warn(str(ex), UserWarning) + + +import numpy as np +from pennylane import BasisState, StatePrep +from pennylane.operation import Operation +from pennylane.tape import QuantumTape +from scipy.sparse import csr_matrix + +# pylint: disable=ungrouped-imports +from pennylane_lightning.core._adjoint_jacobian_base import LightningBaseAdjointJacobian +from pennylane_lightning.core._serialize import QuantumScriptSerializer + + +class LightningGPUAdjointJacobian(LightningBaseAdjointJacobian): + """Check and execute the adjoint Jacobian differentiation method. + + Args: + qubit_state(LightningGPUStateVector): State Vector to calculate the adjoint Jacobian with. + batch_obs(bool): If serialized tape is to be batched or not. + For Lightning GPU, distribute the observations across GPUs in the same node. Defaults to False. + For Lightning GPU-MPI, if `batch_obs=False` the computation requires more memory and is faster, + while `batch_obs=True` allows a larger number of qubits simulation + at the expense of high computational cost. Defaults to False. + """ + + # pylint: disable=too-few-public-methods + + def __init__( + self, + qubit_state: LightningGPUStateVector, # pylint: disable=undefined-variable + batch_obs: bool = False, + ) -> None: + + super().__init__(qubit_state, batch_obs) + + self._dp = DevPool() + + self._use_mpi = qubit_state._mpi_handler.use_mpi + + if self._use_mpi: + self._mpi_handler = qubit_state._mpi_handler + + # Initialize the C++ binds + self._jacobian_lightning, self._create_ops_list_lightning = self._adjoint_jacobian_dtype() + + # Warning about performance with MPI and batch observation + if self._use_mpi and not self._batch_obs: + warn( + "Using LightningGPU with `batch_obs=False` and `use_mpi=True` has the limitation of requiring more memory. If you want to allocate larger number of qubits use the option `batch_obs=True`" + "For more information Check out the section `Parallel adjoint differentiation support` in our website https://docs.pennylane.ai/projects/lightning/en/stable/lightning_gpu/device.html for more details.", + RuntimeWarning, + ) + + def _adjoint_jacobian_dtype(self): + """Binding to Lightning GPU Adjoint Jacobian C++ class. + + Returns: the AdjointJacobian class + """ + if self._use_mpi: + if not MPI_SUPPORT: + warn(str(mpi_error), UserWarning) + + jacobian_lightning = ( + AdjointJacobianMPIC64() if self.dtype == np.complex64 else AdjointJacobianMPIC128() + ) + create_ops_list_lightning = ( + create_ops_listMPIC64 if self.dtype == np.complex64 else create_ops_listMPIC128 + ) + return jacobian_lightning, create_ops_list_lightning + + # without MPI + jacobian_lightning = ( + AdjointJacobianC64() if self.dtype == np.complex64 else AdjointJacobianC128() + ) + create_ops_list_lightning = ( + create_ops_listC64 if self.dtype == np.complex64 else create_ops_listC128 + ) + return jacobian_lightning, create_ops_list_lightning + + def _process_jacobian_tape( + self, tape: QuantumTape, split_obs: bool = False, use_mpi: bool = False + ): + """Process a tape, serializing and building a dictionary proper for + the adjoint Jacobian calculation in the C++ layer. + + Args: + tape (QuantumTape): Operations and measurements that represent instructions for execution on Lightning. + split_obs (bool, optional): If splitting the observables in a list. Defaults to False. + use_mpi (bool, optional): If distributing computation with MPI. Defaults to False. + + Returns: + dictionary: dictionary providing serialized data for Jacobian calculation. + """ + use_csingle = self._qubit_state.dtype == np.complex64 + + obs_serialized, obs_indices = QuantumScriptSerializer( + self._qubit_state.device_name, use_csingle, use_mpi, split_obs + ).serialize_observables(tape) + + ops_serialized, use_sp = QuantumScriptSerializer( + self._qubit_state.device_name, use_csingle, use_mpi, split_obs + ).serialize_ops(tape) + + ops_serialized = self._create_ops_list_lightning(*ops_serialized) + + # We need to filter out indices in trainable_params which do not + # correspond to operators. + trainable_params = sorted(tape.trainable_params) + if len(trainable_params) == 0: + return None + + tp_shift = [] + record_tp_rows = [] + all_params = 0 + + for op_idx, trainable_param in enumerate(trainable_params): + # get op_idx-th operator among differentiable operators + operation, _, _ = tape.get_operation(op_idx) + if isinstance(operation, Operation) and not isinstance( + operation, (BasisState, StatePrep) + ): + # We now just ignore non-op or state preps + tp_shift.append(trainable_param) + record_tp_rows.append(all_params) + all_params += 1 + + if use_sp: + # When the first element of the tape is state preparation. Still, I am not sure + # whether there must be only one state preparation... + tp_shift = [i - 1 for i in tp_shift] + + return { + "state_vector": self.state, + "obs_serialized": obs_serialized, + "ops_serialized": ops_serialized, + "tp_shift": tp_shift, + "record_tp_rows": record_tp_rows, + "all_params": all_params, + "obs_indices": obs_indices, + } + + def calculate_jacobian(self, tape: QuantumTape): + """Computes the Jacobian with the adjoint method. + + .. code-block:: python + + statevector = LightningGPUStateVector(num_wires=num_wires) + statevector = statevector.get_final_state(tape) + jacobian = LightningGPUAdjointJacobian(statevector).calculate_jacobian(tape) + + Args: + tape (QuantumTape): Operations and measurements that represent instructions for execution on Lightning. + + Returns: + The Jacobian of a tape. + """ + + empty_array = self._handle_raises(tape, is_jacobian=True) + + if empty_array: + return np.array([], dtype=self.dtype) + + if self._use_mpi: + split_obs = False # with MPI batched means compute Jacobian one observables at a time, no point splitting linear combinations + else: + split_obs = self._dp.getTotalDevices() if self._batch_obs else False + + processed_data = self._process_jacobian_tape(tape, split_obs, self._use_mpi) + + if not processed_data: # training_params is empty + return np.array([], dtype=self.dtype) + + trainable_params = processed_data["tp_shift"] + + if self._batch_obs: # Batching of Measurements + jac = self._jacobian_lightning.batched( + processed_data["state_vector"], + processed_data["obs_serialized"], + processed_data["ops_serialized"], + trainable_params, + ) + else: + jac = self._jacobian_lightning( + processed_data["state_vector"], + processed_data["obs_serialized"], + processed_data["ops_serialized"], + trainable_params, + ) + + jac = np.array(jac) + has_shape0 = bool(len(jac)) + + num_obs = len(np.unique(processed_data["obs_indices"])) + rows = processed_data["obs_indices"] + cols = np.arange(len(rows), dtype=int) + data = np.ones(len(rows)) + red_mat = csr_matrix((data, (rows, cols)), shape=(num_obs, len(rows))) + jac = red_mat @ jac.reshape((len(rows), -1)) + jac = jac.reshape(-1, len(trainable_params)) if has_shape0 else jac + jac_r = np.zeros((jac.shape[0], processed_data["all_params"])) + jac_r[:, processed_data["record_tp_rows"]] = jac + return self._adjoint_jacobian_processing(jac_r) diff --git a/pennylane_lightning/lightning_gpu/_measurements.py b/pennylane_lightning/lightning_gpu/_measurements.py new file mode 100644 index 000000000..9efd2c19a --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_measurements.py @@ -0,0 +1,219 @@ +# Copyright 2018-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Class implementation for state vector measurements. +""" + +from __future__ import annotations + +from warnings import warn + +try: + from pennylane_lightning.lightning_gpu_ops import MeasurementsC64, MeasurementsC128 + + try: + from pennylane_lightning.lightning_gpu_ops import MeasurementsMPIC64, MeasurementsMPIC128 + + mpi_error = None + MPI_SUPPORT = True + except ImportError as ex_mpi: + mpi_error = ex_mpi + MPI_SUPPORT = False + +except ImportError as error_import: + warn(str(error_import), UserWarning) + +from typing import Any, List + +import numpy as np +import pennylane as qml +from pennylane.measurements import CountsMP, MeasurementProcess, SampleMeasurement, Shots +from pennylane.typing import TensorLike + +# pylint: disable=ungrouped-imports +from pennylane_lightning.core._measurements_base import LightningBaseMeasurements +from pennylane_lightning.core._serialize import QuantumScriptSerializer + + +class LightningGPUMeasurements(LightningBaseMeasurements): # pylint: disable=too-few-public-methods + """Lightning GPU Measurements class + + Measures the state provided by the LightningGPUStateVector class. + + Args: + qubit_state(LightningGPUStateVector): Lightning state-vector class containing the state vector to be measured. + """ + + def __init__( + self, + qubit_state: LightningGPUStateVector, # pylint: disable=undefined-variable + ) -> TensorLike: + + super().__init__(qubit_state) + + self._use_mpi = qubit_state._mpi_handler.use_mpi + + if self._use_mpi: + self._mpi_handler = qubit_state._mpi_handler + self._num_local_wires = qubit_state._mpi_handler.num_local_wires + + self._measurement_lightning = self._measurement_dtype()(qubit_state.state_vector) + + def _measurement_dtype(self): + """Binding to Lightning GPU Measurements C++ class. + + Returns: the Measurements class + """ + if self._use_mpi: + if not MPI_SUPPORT: + warn(str(mpi_error), UserWarning) + + return MeasurementsMPIC128 if self.dtype == np.complex128 else MeasurementsMPIC64 + + # without MPI + return MeasurementsC128 if self.dtype == np.complex128 else MeasurementsC64 + + def _measure_with_samples_diagonalizing_gates( + self, + mps: List[SampleMeasurement], + shots: Shots, + ) -> TensorLike: + """ + Returns the samples of the measurement process performed on the given state, + by rotating the state into the measurement basis using the diagonalizing gates + given by the measurement process. + + Args: + mps (~.measurements.SampleMeasurement): The sample measurements to perform + shots (~.measurements.Shots): The number of samples to take + + Returns: + TensorLike[Any]: Sample measurement results + """ + # apply diagonalizing gates + self._apply_diagonalizing_gates(mps) + + # Specific for LGPU: + total_indices = self._qubit_state.num_wires + wires = qml.wires.Wires(range(total_indices)) + + def _process_single_shot(samples): + processed = [] + for mp in mps: + res = mp.process_samples(samples, wires) + if not isinstance(mp, CountsMP): + res = qml.math.squeeze(res) + + processed.append(res) + + return tuple(processed) + + try: + samples = self._measurement_lightning.generate_samples( + len(wires), shots.total_shots + ).astype(int, copy=False) + + except ValueError as ex: + if str(ex) != "probabilities contain NaN": + raise ex + samples = qml.math.full((shots.total_shots, len(wires)), 0) + + self._apply_diagonalizing_gates(mps, adjoint=True) + + # if there is a shot vector, use the shots.bins generator to + # split samples w.r.t. the shots + processed_samples = [] + for lower, upper in shots.bins(): + result = _process_single_shot(samples[..., lower:upper, :]) + processed_samples.append(result) + + return ( + tuple(zip(*processed_samples)) if shots.has_partitioned_shots else processed_samples[0] + ) + + def expval(self, measurementprocess: MeasurementProcess): + """Expectation value of the supplied observable contained in the MeasurementProcess. + + Args: + measurementprocess (StateMeasurement): measurement to apply to the state + + Returns: + Expectation value of the observable + """ + + if isinstance(measurementprocess.obs, qml.SparseHamiltonian): + # ensuring CSR sparse representation. + + if self._use_mpi: + # Identity for CSR_SparseHamiltonian to pass to processes with rank != 0 to reduce + # host(cpu) memory requirements + obs = qml.Identity(0) + Hmat = qml.Hamiltonian([1.0], [obs]).sparse_matrix() + H_sparse = qml.SparseHamiltonian(Hmat, wires=range(1)) + CSR_SparseHamiltonian = H_sparse.sparse_matrix().tocsr() + # CSR_SparseHamiltonian for rank == 0 + if self._mpi_handler.mpi_manager.getRank() == 0: + CSR_SparseHamiltonian = measurementprocess.obs.sparse_matrix().tocsr() + else: + CSR_SparseHamiltonian = measurementprocess.obs.sparse_matrix( + wire_order=list(range(self._qubit_state.num_wires)) + ).tocsr(copy=False) + + return self._measurement_lightning.expval( + CSR_SparseHamiltonian.indptr, + CSR_SparseHamiltonian.indices, + CSR_SparseHamiltonian.data, + ) + + # use specialized functors to compute expval(Hermitian) + if isinstance(measurementprocess.obs, qml.Hermitian): + observable_wires = measurementprocess.obs.wires + if self._use_mpi and len(observable_wires) > self._num_local_wires: + raise RuntimeError( + "MPI backend does not support Hermitian with number of target wires larger than local wire number." + ) + matrix = measurementprocess.obs.matrix() + return self._measurement_lightning.expval(matrix, observable_wires) + + if ( + isinstance(measurementprocess.obs, qml.ops.Hamiltonian) + or (measurementprocess.obs.arithmetic_depth > 0) + or isinstance(measurementprocess.obs.name, List) + ): + # pylint: disable=protected-access + ob_serialized = QuantumScriptSerializer( + self._qubit_state.device_name, self.dtype == np.complex64, self._use_mpi + )._ob(measurementprocess.obs) + return self._measurement_lightning.expval(ob_serialized) + + return self._measurement_lightning.expval( + measurementprocess.obs.name, measurementprocess.obs.wires + ) + + def _probs_retval_conversion(self, probs_results: Any) -> np.ndarray: + """Convert the data structure from the C++ backend to a common structure through lightning devices. + + Args: + probs_result (Any): Result provided by C++ backend. + + Returns: + np.ndarray with probabilities of the supplied observable or wires. + """ + + # Device returns as col-major orderings, so perform transpose on data for bit-index shuffle for now. + if len(probs_results) > 0: + num_local_wires = len(probs_results).bit_length() - 1 if len(probs_results) > 0 else 0 + return probs_results.reshape([2] * num_local_wires).transpose().reshape(-1) + + return probs_results diff --git a/pennylane_lightning/lightning_gpu/_mpi_handler.py b/pennylane_lightning/lightning_gpu/_mpi_handler.py new file mode 100644 index 000000000..0d569ebeb --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_mpi_handler.py @@ -0,0 +1,126 @@ +# Copyright 2022-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module contains the :class:`~.LightningGPU_MPIHandler` class, a MPI handler to use LightningGPU device with multi-GPU on multi-node system. +""" + +try: + # pylint: disable=no-name-in-module + from pennylane_lightning.lightning_gpu_ops import DevPool, DevTag, MPIManager + + MPI_SUPPORT = True +except ImportError: + MPI_SUPPORT = False + +from typing import Union + +import numpy as np + + +# MPI options +class MPIHandler: # pylint: disable=too-few-public-methods + """MPI handler for PennyLane Lightning GPU device. + + MPI handler to use a GPU-backed Lightning device using NVIDIA cuQuantum SDK with parallel capabilities. + + Use the MPI library is necessary to initialize different variables and methods to handle the data across nodes and perform checks for memory allocation on each device. + + Args: + mpi (bool): declare if the device will use the MPI support. + mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. + num_wires (int): the number of wires to initialize the device with. + c_dtype (np.complex64, np.complex128): Datatypes for statevector representation. + """ + + def __init__( + self, + mpi: bool, + mpi_buf_size: int, + num_wires: int, + c_dtype: Union[np.complex64, np.complex128], + ) -> None: + + self.use_mpi = mpi + self.mpi_buf_size = mpi_buf_size + + self._dp = DevPool() + + if self.use_mpi: + + if not MPI_SUPPORT: + raise ImportError( + "Pre-compiled binaries for lightning.gpu with MPI support are not available. " + "To manually compile from source, follow the instructions at " + "https://docs.pennylane.ai/projects/lightning/en/stable/dev/installation.html." + ) + + if mpi_buf_size < 0: + raise ValueError(f"Unsupported mpi_buf_size value: {mpi_buf_size}, should be >= 0") + + if mpi_buf_size > 0 and (mpi_buf_size & (mpi_buf_size - 1)): + raise ValueError( + f"Unsupported mpi_buf_size value: {mpi_buf_size}. mpi_buf_size should be power of 2." + ) + + # After check if all MPI parameters are ok + self.mpi_manager, self.devtag = self._mpi_init_helper(num_wires) + + # set the number of global and local wires + commSize = self.mpi_manager.getSize() + self.num_global_wires = commSize.bit_length() - 1 + self.num_local_wires = num_wires - self.num_global_wires + + self._check_memory_size(c_dtype, mpi_buf_size) + + if not self.use_mpi: + self.num_local_wires = num_wires + self.num_global_wires = num_wires + + def _mebibytesToBytes(self, mebibytes): + return mebibytes * 1024 * 1024 + + def _check_memory_size(self, c_dtype, mpi_buf_size): + # Memory size in bytes + sv_memsize = np.dtype(c_dtype).itemsize * (1 << self.num_local_wires) + if self._mebibytesToBytes(mpi_buf_size) > sv_memsize: + raise RuntimeError("The MPI buffer size is larger than the local state vector size.") + + def _mpi_init_helper(self, num_wires): + """Set up MPI checks and initializations.""" + + # initialize MPIManager and config check in the MPIManager ctor + mpi_manager = MPIManager() + + # check if number of GPUs per node is larger than number of processes per node + numDevices = self._dp.getTotalDevices() + numProcsNode = mpi_manager.getSizeNode() + + if numDevices < numProcsNode: + raise ValueError( + "Number of devices should be larger than or equal to the number of processes on each node." + ) + + # check if the process number is larger than number of statevector elements + if mpi_manager.getSize() > (1 << (num_wires - 1)): + raise ValueError( + "Number of processes should be smaller than the number of statevector elements." + ) + + # set GPU device + rank = mpi_manager.getRank() + deviceid = rank % numProcsNode + self._dp.setDeviceID(deviceid) + devtag = DevTag(deviceid) + + return (mpi_manager, devtag) diff --git a/pennylane_lightning/lightning_gpu/_state_vector.py b/pennylane_lightning/lightning_gpu/_state_vector.py new file mode 100644 index 000000000..d550fcb28 --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_state_vector.py @@ -0,0 +1,337 @@ +# Copyright 2018-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Class implementation for lightning_gpu state-vector manipulation. +""" +from warnings import warn + +try: + from pennylane_lightning.lightning_gpu_ops import StateVectorC64, StateVectorC128 + + try: # Try to import the MPI modules + from pennylane_lightning.lightning_gpu_ops import StateVectorMPIC64, StateVectorMPIC128 + + mpi_error = None + MPI_SUPPORT = True + except ImportError as ex_mpi: + mpi_error = ex_mpi + MPI_SUPPORT = False + +except ImportError as ex: + warn(str(ex), UserWarning) + +from itertools import product +from typing import Union + +import numpy as np +import pennylane as qml +from pennylane import DeviceError +from pennylane.ops.op_math import Adjoint +from pennylane.wires import Wires + +# pylint: disable=ungrouped-imports +from pennylane_lightning.core._serialize import global_phase_diagonal +from pennylane_lightning.core._state_vector_base import LightningBaseStateVector + +from ._mpi_handler import MPIHandler + +gate_cache_needs_hash = ( + qml.BlockEncode, + qml.ControlledQubitUnitary, + qml.DiagonalQubitUnitary, + qml.MultiControlledX, + qml.OrbitalRotation, + qml.PSWAP, + qml.QubitUnitary, +) + + +class LightningGPUStateVector(LightningBaseStateVector): + """Lightning GPU state-vector class. + + Interfaces with C++ python binding methods for state-vector manipulation. + + Args: + num_wires(int): the number of wires to initialize the device with + dtype: Datatypes for state-vector representation. Must be one of + ``np.complex64`` or ``np.complex128``. Default is ``np.complex128`` + device_name(string): state vector device name. Options: ["lightning.gpu"] + mpi_handler(MPIHandler): MPI handler for PennyLane Lightning GPU device. + Provides functionality to distribute the state-vector to multiple devices. + sync (bool): is host-device data copy synchronized or not. + """ + + def __init__( + self, + num_wires: int, + dtype: Union[np.complex128, np.complex64] = np.complex128, + mpi_handler: MPIHandler = None, + sync: bool = True, + ): + + super().__init__(num_wires, dtype) + + self._device_name = "lightning.gpu" + + # Initialize GPU and MPI variables + if mpi_handler is None: + mpi_handler = MPIHandler(False, 0, num_wires, dtype) + + self._num_global_wires = mpi_handler.num_global_wires + self._num_local_wires = mpi_handler.num_local_wires + + self._mpi_handler = mpi_handler + self._sync = sync + + # Initialize the state vector + if self._mpi_handler.use_mpi: # using MPI + self._qubit_state = self._state_dtype()( + self._mpi_handler.mpi_manager, + self._mpi_handler.devtag, + self._mpi_handler.mpi_buf_size, + self._mpi_handler.num_global_wires, + self._mpi_handler.num_local_wires, + ) + else: # without MPI + self._qubit_state = self._state_dtype()(self.num_wires) + + self.reset_state() + + def _state_dtype(self): + """Binding to Lightning Managed state vector C++ class. + + Returns: the state vector class + """ + if self._mpi_handler.use_mpi: + if not MPI_SUPPORT: + warn(str(mpi_error), UserWarning) + + return StateVectorMPIC128 if self.dtype == np.complex128 else StateVectorMPIC64 + + # without MPI + return StateVectorC128 if self.dtype == np.complex128 else StateVectorC64 + + def syncD2H(self, state_vector, use_async=False): + """Copy the state vector data on device to a state vector on the host provided by the user. + Args: + state_vector(array[complex]): the state vector array on host. + use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. + Note: This function only supports synchronized memory copy. + + **Example** + + >>> dev = qml.device('lightning.gpu', wires=1) + >>> dev.apply([qml.PauliX(wires=[0])]) + >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.c_type) + >>> dev.syncD2H(state_vector) + >>> print(state_vector) + [0.+0.j 1.+0.j] + """ + self._qubit_state.DeviceToHost(state_vector.ravel(order="C"), use_async) + + @property + def state(self): + """Copy the state vector data from the device to the host. + + A state vector Numpy array is explicitly allocated on the host to store and return the data. + + **Example** + + >>> dev = qml.device('lightning.gpu', wires=1) + >>> dev.apply([qml.PauliX(wires=[0])]) + >>> print(dev.state) + [0.+0.j 1.+0.j] + """ + state = np.zeros(2**self._num_local_wires, dtype=self.dtype) + self.syncD2H(state) + return state + + def syncH2D(self, state_vector, use_async=False): + """Copy the state vector data on host provided by the user to the state vector on the device + Args: + state_vector(array[complex]): the state vector array on host. + use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. + Note: This function only supports synchronized memory copy. + + **Example** + + >>> dev = qml.device('lightning.gpu', wires=3) + >>> obs = qml.Identity(0) @ qml.PauliX(1) @ qml.PauliY(2) + >>> obs1 = qml.Identity(1) + >>> H = qml.Hamiltonian([1.0, 1.0], [obs1, obs]) + >>> state_vector = np.array([0.0 + 0.0j, 0.0 + 0.1j, 0.1 + 0.1j, 0.1 + 0.2j, + 0.2 + 0.2j, 0.3 + 0.3j, 0.3 + 0.4j, 0.4 + 0.5j,], dtype=np.complex64,) + >>> dev.syncH2D(state_vector) + >>> res = dev.expval(H) + >>> print(res) + 1.0 + """ + self._qubit_state.HostToDevice(state_vector.ravel(order="C"), use_async) + + @staticmethod + def _asarray(arr, dtype=None): + arr = np.asarray(arr) # arr is not copied + + if arr.dtype.kind not in ["f", "c"]: + return arr + + if not dtype: + dtype = arr.dtype + + return arr + + def _apply_state_vector(self, state, device_wires, use_async=False): + """Initialize the state vector on GPU with a specified state on host. + Note that any use of this method will introduce host-overheads. + Args: + state (array[complex]): normalized input state (on host) of length ``2**len(wires)`` + or broadcasted state of shape ``(batch_size, 2**len(wires))`` + device_wires (Wires): wires that get initialized in the state + use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. + Note: This function only supports synchronized memory copy from host to device. + """ + + if isinstance(state, self._qubit_state.__class__): + raise DeviceError("LightningGPU does not support allocate external state_vector.") + + # TODO + # Create an implementation in the C++ backend and binding to be able + # to allocate memory for a new statevector and copy the data + # from an external state vector. + # state_data = allocate_aligned_array(state.size, np.dtype(self.dtype), True) + # state.getState(state_data) + # state = state_data + + state = self._asarray(state, dtype=self.dtype) # this operation on host + output_shape = [2] * self._num_local_wires + + if len(device_wires) == self.num_wires and Wires(sorted(device_wires)) == device_wires: + # Initialize the entire device state with the input state + if self.num_wires == self._num_local_wires: + self.syncH2D(np.reshape(state, output_shape)) + return + local_state = np.zeros(2**self._num_local_wires, dtype=self._dtype) + self._mpi_handler.mpi_manager.Scatter(state, local_state, 0) + self.syncH2D(np.reshape(local_state, output_shape)) + return + + # generate basis states on subset of qubits via the cartesian product + basis_states = np.array(list(product([0, 1], repeat=len(device_wires)))) + + # get basis states to alter on full set of qubits + unravelled_indices = np.zeros((2 ** len(device_wires), self.num_wires), dtype=int) + unravelled_indices[:, device_wires] = basis_states + + # get indices for which the state is changed to input state vector elements + ravelled_indices = np.ravel_multi_index(unravelled_indices.T, [2] * self.num_wires) + + # set the state vector on GPU with the unravelled_indices and their corresponding values + self._qubit_state.setStateVector( + ravelled_indices, state, use_async + ) # this operation on device + + def _apply_lightning_controlled(self, operation): + """Apply an arbitrary controlled operation to the state tensor. + + Args: + operation (~pennylane.operation.Operation): controlled operation to apply + + Returns: + None + """ + state = self.state_vector + + control_wires = list(operation.control_wires) + control_values = operation.control_values + name = operation.name + # Apply GlobalPhase + inv = False + param = operation.parameters[0] + wires = self.wires.indices(operation.wires) + matrix = global_phase_diagonal(param, self.wires, control_wires, control_values) + state.apply(name, wires, inv, [[param]], matrix) + + def _apply_lightning_midmeasure(self): + """Execute a MidMeasureMP operation and return the sample in mid_measurements. + + Args: + + Returns: + None + """ + raise DeviceError("LightningGPU does not support Mid-circuit measurements.") + + # pylint: disable=unused-argument + def _apply_lightning( + self, operations, mid_measurements: dict = None, postselect_mode: str = None + ): + """Apply a list of operations to the state vector. + + Args: + operations (list[~pennylane.operation.Operation]): operations to apply + mid_measurements (None, dict): Dictionary of mid-circuit measurements + postselect_mode (str): Configuration for handling shots with mid-circuit measurement + postselection. Use ``"hw-like"`` to discard invalid shots and ``"fill-shots"`` to + keep the same number of shots. Default is ``None``. + + Returns: + None + """ + state = self.state_vector + + # Skip over identity operations instead of performing + # matrix multiplication with it. + for operation in operations: + if isinstance(operation, qml.Identity): + continue + if isinstance(operation, Adjoint): + name = operation.base.name + invert_param = True + else: + name = operation.name + invert_param = False + method = getattr(state, name, None) + wires = list(operation.wires) + + if method is not None: # apply specialized gate + param = operation.parameters + method(wires, invert_param, param) + elif isinstance(operation, qml.ops.Controlled) and isinstance( + operation.base, qml.GlobalPhase + ): # apply n-controlled gate + # LGPU do not support the controlled gates except for GlobalPhase + self._apply_lightning_controlled(operation) + else: # apply gate as a matrix + try: + mat = qml.matrix(operation) + except AttributeError: # pragma: no cover + # To support older versions of PL + mat = operation.matrix + + r_dtype = np.float32 if self.dtype == np.complex64 else np.float64 + param = ( + [[r_dtype(operation.hash)]] + if isinstance(operation, gate_cache_needs_hash) + else [] + ) + if len(mat) == 0: + raise ValueError("Unsupported operation") + + self._qubit_state.apply( + name, + wires, + False, + param, + mat.ravel(order="C"), # inv = False: Matrix already in correct form; + ) # Parameters can be ignored for explicit matrices; F-order for cuQuantum diff --git a/pennylane_lightning/lightning_gpu/lightning_gpu.py b/pennylane_lightning/lightning_gpu/lightning_gpu.py index 2894b999f..84d7dd31e 100644 --- a/pennylane_lightning/lightning_gpu/lightning_gpu.py +++ b/pennylane_lightning/lightning_gpu/lightning_gpu.py @@ -16,921 +16,496 @@ This module contains the :class:`~.LightningGPU` class, a PennyLane simulator device that interfaces with the NVIDIA cuQuantum cuStateVec simulator library for GPU-enabled calculations. """ +from __future__ import annotations from ctypes.util import find_library +from dataclasses import replace from importlib import util as imp_util -from itertools import product from pathlib import Path -from typing import List, Union +from typing import List, Optional, Union from warnings import warn import numpy as np import pennylane as qml -from pennylane import BasisState, DeviceError, QuantumFunctionError, Rot, StatePrep, math -from pennylane.measurements import Expectation, State -from pennylane.ops.op_math import Adjoint -from pennylane.wires import Wires -from scipy.sparse import csr_matrix - -from pennylane_lightning.core._serialize import QuantumScriptSerializer, global_phase_diagonal -from pennylane_lightning.core._version import __version__ - -# pylint: disable=import-error, no-name-in-module, ungrouped-imports -from pennylane_lightning.core.lightning_base import LightningBase +from pennylane.devices import DefaultExecutionConfig, ExecutionConfig +from pennylane.devices.default_qubit import adjoint_ops +from pennylane.devices.modifiers import simulator_tracking, single_tape_support +from pennylane.devices.preprocess import ( + decompose, + mid_circuit_measurements, + no_sampling, + validate_adjoint_trainable_params, + validate_device_wires, + validate_measurements, + validate_observables, +) +from pennylane.measurements import MidMeasureMP +from pennylane.operation import DecompositionUndefinedError, Operator, Tensor +from pennylane.ops import Prod, SProd, Sum +from pennylane.tape import QuantumScript +from pennylane.transforms.core import TransformProgram +from pennylane.typing import Result + +from pennylane_lightning.core.lightning_newAPI_base import ( + LightningBase, + QuantumTape_or_Batch, + Result_or_ResultBatch, +) try: from pennylane_lightning.lightning_gpu_ops import ( DevPool, - MeasurementsC64, - MeasurementsC128, - StateVectorC64, - StateVectorC128, backend_info, get_gpu_arch, is_gpu_supported, ) - from pennylane_lightning.lightning_gpu_ops.algorithms import ( - AdjointJacobianC64, - AdjointJacobianC128, - create_ops_listC64, - create_ops_listC128, - ) - - try: - # pylint: disable=no-name-in-module - from pennylane_lightning.lightning_gpu_ops import ( - DevTag, - MeasurementsMPIC64, - MeasurementsMPIC128, - MPIManager, - StateVectorMPIC64, - StateVectorMPIC128, - ) - from pennylane_lightning.lightning_gpu_ops.algorithmsMPI import ( - AdjointJacobianMPIC64, - AdjointJacobianMPIC128, - create_ops_listMPIC64, - create_ops_listMPIC128, - ) - - MPI_SUPPORT = True - except ImportError as ex: - warn(str(ex), UserWarning) - MPI_SUPPORT = False - - if find_library("custatevec") is None and not imp_util.find_spec( - "cuquantum" - ): # pragma: no cover - raise ImportError( - "custatevec libraries not found. Please pip install the appropriate custatevec library in a virtual environment." - ) - if not DevPool.getTotalDevices(): # pragma: no cover - raise ValueError("No supported CUDA-capable device found") - - if not is_gpu_supported(): # pragma: no cover - raise ValueError(f"CUDA device is an unsupported version: {get_gpu_arch()}") LGPU_CPP_BINARY_AVAILABLE = True + except (ImportError, ValueError) as ex: warn(str(ex), UserWarning) - backend_info = None LGPU_CPP_BINARY_AVAILABLE = False + backend_info = None - -def _gpu_dtype(dtype, mpi=False): - if dtype not in [np.complex128, np.complex64]: # pragma: no cover - raise ValueError(f"Data type is not supported for state-vector computation: {dtype}") - if mpi: - return StateVectorMPIC128 if dtype == np.complex128 else StateVectorMPIC64 - return StateVectorC128 if dtype == np.complex128 else StateVectorC64 - - -def _adj_dtype(use_csingle, mpi=False): - if mpi: - return AdjointJacobianMPIC64 if use_csingle else AdjointJacobianMPIC128 - return AdjointJacobianC64 if use_csingle else AdjointJacobianC128 - - -def _mebibytesToBytes(mebibytes): - return mebibytes * 1024 * 1024 - - -allowed_operations = { - "Identity", - "BasisState", - "QubitStateVector", - "StatePrep", - "QubitUnitary", - "ControlledQubitUnitary", - "MultiControlledX", - "DiagonalQubitUnitary", - "PauliX", - "PauliY", - "PauliZ", - "MultiRZ", - "GlobalPhase", - "C(GlobalPhase)", - "Hadamard", - "S", - "Adjoint(S)", - "T", - "Adjoint(T)", - "SX", - "Adjoint(SX)", - "CNOT", - "SWAP", - "ISWAP", - "PSWAP", - "Adjoint(ISWAP)", - "SISWAP", - "Adjoint(SISWAP)", - "SQISW", - "CSWAP", - "Toffoli", - "CY", - "CZ", - "PhaseShift", - "ControlledPhaseShift", - "RX", - "RY", - "RZ", - "Rot", - "CRX", - "CRY", - "CRZ", - "CRot", - "IsingXX", - "IsingYY", - "IsingZZ", - "IsingXY", - "SingleExcitation", - "SingleExcitationPlus", - "SingleExcitationMinus", - "DoubleExcitation", - "DoubleExcitationPlus", - "DoubleExcitationMinus", - "QubitCarry", - "QubitSum", - "OrbitalRotation", - "ECR", - "BlockEncode", - "C(BlockEncode)", -} - -allowed_observables = { - "PauliX", - "PauliY", - "PauliZ", - "Hadamard", - "SparseHamiltonian", - "Hamiltonian", - "LinearCombination", - "Hermitian", - "Identity", - "Projector", - "Sum", - "Prod", - "SProd", -} - -gate_cache_needs_hash = ( - qml.BlockEncode, - qml.ControlledQubitUnitary, - qml.DiagonalQubitUnitary, - qml.MultiControlledX, - qml.OrbitalRotation, - qml.PSWAP, - qml.QubitUnitary, +from ._adjoint_jacobian import LightningGPUAdjointJacobian +from ._measurements import LightningGPUMeasurements +from ._mpi_handler import MPIHandler +from ._state_vector import LightningGPUStateVector + +# The set of supported operations. +_operations = frozenset( + { + "Identity", + "QubitStateVector", + "QubitUnitary", + "ControlledQubitUnitary", + "MultiControlledX", + "DiagonalQubitUnitary", + "PauliX", + "PauliY", + "PauliZ", + "MultiRZ", + "GlobalPhase", + "C(GlobalPhase)", + "Hadamard", + "S", + "Adjoint(S)", + "T", + "Adjoint(T)", + "SX", + "Adjoint(SX)", + "CNOT", + "SWAP", + "ISWAP", + "PSWAP", + "Adjoint(ISWAP)", + "SISWAP", + "Adjoint(SISWAP)", + "SQISW", + "CSWAP", + "Toffoli", + "CY", + "CZ", + "PhaseShift", + "ControlledPhaseShift", + "RX", + "RY", + "RZ", + "Rot", + "CRX", + "CRY", + "CRZ", + "CRot", + "IsingXX", + "IsingYY", + "IsingZZ", + "IsingXY", + "SingleExcitation", + "SingleExcitationPlus", + "SingleExcitationMinus", + "DoubleExcitation", + "DoubleExcitationPlus", + "DoubleExcitationMinus", + "QubitCarry", + "QubitSum", + "OrbitalRotation", + "ECR", + "BlockEncode", + "C(BlockEncode)", + } +) +# End the set of supported operations. + +# The set of supported observables. +_observables = frozenset( + { + "PauliX", + "PauliY", + "PauliZ", + "Hadamard", + "SparseHamiltonian", + "Hamiltonian", + "LinearCombination", + "Hermitian", + "Identity", + "Projector", + "Sum", + "Prod", + "SProd", + } ) -class LightningGPU(LightningBase): # pylint: disable=too-many-instance-attributes - """PennyLane Lightning GPU device. +def stopping_condition(op: Operator) -> bool: + """A function that determines whether or not an operation is supported by ``lightning.gpu``.""" + # To avoid building matrices beyond the given thresholds. + # This should reduce runtime overheads for larger systems. + if isinstance(op, qml.QFT): + return len(op.wires) < 10 + if isinstance(op, qml.GroverOperator): + return len(op.wires) < 13 + if isinstance(op, qml.PauliRot): + return False - A GPU-backed Lightning device using NVIDIA cuQuantum SDK. + return op.name in _operations - Use of this device requires pre-built binaries or compilation from source. Check out the - :doc:`/lightning_gpu/installation` guide for more details. - Args: - wires (int): the number of wires to initialize the device with - mpi (bool): enable MPI support. MPI support will be enabled if ``mpi`` is set as``True``. - mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. - sync (bool): immediately sync with host-sv after applying operations - c_dtype: Datatypes for statevector representation. Must be one of ``np.complex64`` or ``np.complex128``. - shots (int): How many times the circuit should be evaluated (or sampled) to estimate - the expectation values. Defaults to ``None`` if not specified. Setting - to ``None`` results in computing statistics like expectation values and - variances analytically. - batch_obs (Union[bool, int]): determine whether to use multiple GPUs within the same node or not - """ +def stopping_condition_shots(op: Operator) -> bool: + """A function that determines whether or not an operation is supported by ``lightning.gpu`` + with finite shots.""" + if isinstance(op, (MidMeasureMP, qml.ops.op_math.Conditional)): + # LightningGPU does not support Mid-circuit measurements. + return False + return stopping_condition(op) - name = "Lightning GPU PennyLane plugin" - short_name = "lightning.gpu" - operations = allowed_operations - observables = allowed_observables - _backend_info = backend_info - config = Path(__file__).parent / "lightning_gpu.toml" - _CPP_BINARY_AVAILABLE = LGPU_CPP_BINARY_AVAILABLE +def accepted_observables(obs: Operator) -> bool: + """A function that determines whether or not an observable is supported by ``lightning.gpu``.""" + return obs.name in _observables - def __init__( - self, - wires, - *, - mpi: bool = False, - mpi_buf_size: int = 0, - sync=False, - c_dtype=np.complex128, - shots=None, - batch_obs: Union[bool, int] = False, - ): # pylint: disable=too-many-arguments - if c_dtype is np.complex64: - self.use_csingle = True - elif c_dtype is np.complex128: - self.use_csingle = False - else: - raise TypeError(f"Unsupported complex type: {c_dtype}") - - super().__init__(wires, shots=shots, c_dtype=c_dtype) - self._dp = DevPool() +def adjoint_observables(obs: Operator) -> bool: + """A function that determines whether or not an observable is supported by ``lightning.gpu`` + when using the adjoint differentiation method.""" + if isinstance(obs, qml.Projector): + return False - if not mpi: - self._mpi = False - self._num_local_wires = self.num_wires - self._gpu_state = _gpu_dtype(c_dtype)(self._num_local_wires) - else: - self._mpi = True - self._mpi_init_helper(self.num_wires) - - if mpi_buf_size < 0: - raise TypeError(f"Unsupported mpi_buf_size value: {mpi_buf_size}") - - if mpi_buf_size: - if mpi_buf_size & (mpi_buf_size - 1): - raise TypeError( - f"Unsupported mpi_buf_size value: {mpi_buf_size}. mpi_buf_size should be power of 2." - ) - # Memory size in bytes - sv_memsize = np.dtype(c_dtype).itemsize * (1 << self._num_local_wires) - if _mebibytesToBytes(mpi_buf_size) > sv_memsize: - w_msg = "The MPI buffer size is larger than the local state vector size." - warn( - w_msg, - RuntimeWarning, - ) - - self._gpu_state = _gpu_dtype(c_dtype, mpi)( - self._mpi_manager, - self._devtag, - mpi_buf_size, - self._num_global_wires, - self._num_local_wires, - ) + if isinstance(obs, Tensor): + if any(isinstance(o, qml.Projector) for o in obs.non_identity_obs): + return False + return True - self._sync = sync - self._batch_obs = batch_obs - self._create_basis_state(0) - - def _mpi_init_helper(self, num_wires): - """Set up MPI checks.""" - if not MPI_SUPPORT: - raise ImportError("MPI related APIs are not found.") - # initialize MPIManager and config check in the MPIManager ctor - self._mpi_manager = MPIManager() - # check if number of GPUs per node is larger than - # number of processes per node - numDevices = self._dp.getTotalDevices() - numProcsNode = self._mpi_manager.getSizeNode() - if numDevices < numProcsNode: - raise ValueError( - "Number of devices should be larger than or equal to the number of processes on each node." - ) - # check if the process number is larger than number of statevector elements - if self._mpi_manager.getSize() > (1 << (num_wires - 1)): - raise ValueError( - "Number of processes should be smaller than the number of statevector elements." - ) - # set the number of global and local wires - commSize = self._mpi_manager.getSize() - self._num_global_wires = commSize.bit_length() - 1 - self._num_local_wires = num_wires - self._num_global_wires - # set GPU device - rank = self._mpi_manager.getRank() - deviceid = rank % numProcsNode - self._dp.setDeviceID(deviceid) - self._devtag = DevTag(deviceid) - - @staticmethod - def _asarray(arr, dtype=None): - arr = np.asarray(arr) # arr is not copied - - if arr.dtype.kind not in ["f", "c"]: - return arr - - if not dtype: - dtype = arr.dtype - - return arr - - # pylint disable=missing-function-docstring - def reset(self): - """Reset the device""" - super().reset() - # init the state vector to |00..0> - self._gpu_state.resetGPU(False) # Sync reset + if isinstance(obs, SProd): + return adjoint_observables(obs.base) - @property - def state(self): - # pylint disable=missing-function-docstring - """Copy the state vector data from the device to the host. + if isinstance(obs, (Sum, Prod)): + return all(adjoint_observables(o) for o in obs) - A state vector Numpy array is explicitly allocated on the host to store and return the data. + return obs.name in _observables - **Example** - >>> dev = qml.device('lightning.gpu', wires=1) - >>> dev.apply([qml.PauliX(wires=[0])]) - >>> print(dev.state) - [0.+0.j 1.+0.j] - """ - state = np.zeros(1 << self._num_local_wires, dtype=self.C_DTYPE) - state = self._asarray(state, dtype=self.C_DTYPE) - self.syncD2H(state) - return state +def adjoint_measurements(mp: qml.measurements.MeasurementProcess) -> bool: + """Specifies whether or not an observable is compatible with adjoint differentiation on DefaultQubit.""" + return isinstance(mp, qml.measurements.ExpectationMP) - @property - def create_ops_list(self): - """Returns create_ops_list function of the matching precision.""" - if self._mpi: - return create_ops_listMPIC64 if self.use_csingle else create_ops_listMPIC128 - return create_ops_listC64 if self.use_csingle else create_ops_listC128 - @property - def measurements(self): - """Returns Measurements constructor of the matching precision.""" - if self._mpi: - return ( - MeasurementsMPIC64(self._gpu_state) - if self.use_csingle - else MeasurementsMPIC128(self._gpu_state) - ) - return ( - MeasurementsC64(self._gpu_state) - if self.use_csingle - else MeasurementsC128(self._gpu_state) - ) +def _supports_adjoint(circuit): + if circuit is None: + return True - def syncD2H(self, state_vector, use_async=False): - """Copy the state vector data on device to a state vector on the host provided by the user - Args: - state_vector(array[complex]): the state vector array on host - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy. - - **Example** - >>> dev = qml.device('lightning.gpu', wires=1) - >>> dev.apply([qml.PauliX(wires=[0])]) - >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.C_DTYPE) - >>> dev.syncD2H(state_vector) - >>> print(state_vector) - [0.+0.j 1.+0.j] - """ - self._gpu_state.DeviceToHost(state_vector.ravel(order="C"), use_async) + prog = TransformProgram() + _add_adjoint_transforms(prog) - def syncH2D(self, state_vector, use_async=False): - """Copy the state vector data on host provided by the user to the state vector on the device - Args: - state_vector(array[complex]): the state vector array on host. - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy. - - **Example** - >>> dev = qml.device('lightning.gpu', wires=3) - >>> obs = qml.Identity(0) @ qml.PauliX(1) @ qml.PauliY(2) - >>> obs1 = qml.Identity(1) - >>> H = qml.Hamiltonian([1.0, 1.0], [obs1, obs]) - >>> state_vector = np.array([0.0 + 0.0j, 0.0 + 0.1j, 0.1 + 0.1j, 0.1 + 0.2j, - 0.2 + 0.2j, 0.3 + 0.3j, 0.3 + 0.4j, 0.4 + 0.5j,], dtype=np.complex64,) - >>> dev.syncH2D(state_vector) - >>> res = dev.expval(H) - >>> print(res) - 1.0 - """ - self._gpu_state.HostToDevice(state_vector.ravel(order="C"), use_async) + try: + prog((circuit,)) + except (DecompositionUndefinedError, qml.DeviceError, AttributeError): + return False + return True - def _create_basis_state(self, index, use_async=False): - """Return a computational basis state over all wires. - Args: - index (int): integer representing the computational basis state. - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy. - """ - self._gpu_state.setBasisState(index, use_async) - def _apply_state_vector(self, state, device_wires, use_async=False): - """Initialize the state vector on GPU with a specified state on host. - Note that any use of this method will introduce host-overheads. - Args: - state (array[complex]): normalized input state (on host) of length ``2**len(wires)`` - or broadcasted state of shape ``(batch_size, 2**len(wires))`` - device_wires (Wires): wires that get initialized in the state - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy from host to device. - """ - # translate to wire labels used by device - device_wires = self.map_wires(device_wires) - - state = self._asarray(state, dtype=self.C_DTYPE) # this operation on host - output_shape = [2] * self._num_local_wires - - if len(device_wires) == self.num_wires and Wires(sorted(device_wires)) == device_wires: - # Initialize the entire device state with the input state - if self.num_wires == self._num_local_wires: - self.syncH2D(self._reshape(state, output_shape)) - return - local_state = np.zeros(1 << self._num_local_wires, dtype=self.C_DTYPE) - self._mpi_manager.Scatter(state, local_state, 0) - # Initialize the entire device state with the input state - self.syncH2D(self._reshape(local_state, output_shape)) - return - - # generate basis states on subset of qubits via the cartesian product - basis_states = np.array(list(product([0, 1], repeat=len(device_wires)))) - - # get basis states to alter on full set of qubits - unravelled_indices = np.zeros((2 ** len(device_wires), self.num_wires), dtype=int) - unravelled_indices[:, device_wires] = basis_states - - # get indices for which the state is changed to input state vector elements - ravelled_indices = np.ravel_multi_index(unravelled_indices.T, [2] * self.num_wires) - - # set the state vector on GPU with the unravelled_indices and their corresponding values - self._gpu_state.setStateVector( - ravelled_indices, state, use_async - ) # this operation on device - - def _apply_basis_state(self, state, wires): - """Initialize the state vector in a specified computational basis state on GPU directly. - Args: - state (array[int]): computational basis state (on host) of shape ``(wires,)`` - consisting of 0s and 1s. - wires (Wires): wires that the provided computational state should be initialized on - Note: This function does not support broadcasted inputs yet. - """ - # translate to wire labels used by device - device_wires = self.map_wires(wires) +def _adjoint_ops(op: qml.operation.Operator) -> bool: + """Specify whether or not an Operator is supported by adjoint differentiation.""" + return not isinstance(op, qml.PauliRot) and adjoint_ops(op) - # length of basis state parameter - n_basis_state = len(state) - state = state.tolist() if hasattr(state, "tolist") else state - if not set(state).issubset({0, 1}): - raise ValueError("BasisState parameter must consist of 0 or 1 integers.") - if n_basis_state != len(device_wires): - raise ValueError("BasisState parameter and wires must be of equal length.") +def _add_adjoint_transforms(program: TransformProgram) -> None: + """Private helper function for ``preprocess`` that adds the transforms specific + for adjoint differentiation. - # get computational basis state number - basis_states = 2 ** (self.num_wires - 1 - np.array(device_wires)) - basis_states = qml.math.convert_like(basis_states, state) - num = int(qml.math.dot(state, basis_states)) + Args: + program (TransformProgram): where we will add the adjoint differentiation transforms - self._create_basis_state(num) + Side Effects: + Adds transforms to the input program. - def apply_lightning(self, operations): - """Apply a list of operations to the state tensor. + """ - Args: - operations (list[~pennylane.operation.Operation]): operations to apply - dtype (type): Type of numpy ``complex`` to be used. Can be important - to specify for large systems for memory allocation purposes. + name = "adjoint + lightning.gpu" + program.add_transform(no_sampling, name=name) + program.add_transform( + decompose, + stopping_condition=_adjoint_ops, + stopping_condition_shots=stopping_condition_shots, + name=name, + skip_initial_state_prep=False, + ) + program.add_transform(validate_observables, accepted_observables, name=name) + program.add_transform( + validate_measurements, analytic_measurements=adjoint_measurements, name=name + ) + program.add_transform(qml.transforms.broadcast_expand) + program.add_transform(validate_adjoint_trainable_params) - Returns: - array[complex]: the output state tensor - """ - # Skip over identity operations instead of performing - # matrix multiplication with the identity. - for ops in operations: - if isinstance(ops, qml.Identity): - continue - if isinstance(ops, Adjoint): - name = ops.base.name - invert_param = True - else: - name = ops.name - invert_param = False - method = getattr(self._gpu_state, name, None) - wires = self.wires.indices(ops.wires) - - if isinstance(ops, qml.ops.op_math.Controlled) and isinstance( - ops.base, qml.GlobalPhase - ): - controls = ops.control_wires - control_values = ops.control_values - param = ops.base.parameters[0] - matrix = global_phase_diagonal(param, self.wires, controls, control_values) - self._gpu_state.apply(name, wires, False, [], matrix) - elif method is None: - # Inverse can be set to False since qml.matrix(ops) is already in inverted form - try: - mat = qml.matrix(ops) - except AttributeError: # pragma: no cover - # To support older versions of PL - mat = ops.matrix - r_dtype = np.float32 if self.use_csingle else np.float64 - param = [[r_dtype(ops.hash)]] if isinstance(ops, gate_cache_needs_hash) else [] - if len(mat) == 0: - raise ValueError("Unsupported operation") - self._gpu_state.apply( - name, - wires, - False, - param, - mat.ravel(order="C"), # inv = False: Matrix already in correct form; - ) # Parameters can be ignored for explicit matrices; F-order for cuQuantum - - else: - param = ops.parameters - method(wires, invert_param, param) - # pylint: disable=unused-argument - def apply(self, operations, rotations=None, **kwargs): - """Applies a list of operations to the state tensor.""" - # State preparation is currently done in Python - if operations: # make sure operations[0] exists - if isinstance(operations[0], StatePrep): - self._apply_state_vector(operations[0].parameters[0].copy(), operations[0].wires) - operations = operations[1:] - elif isinstance(operations[0], BasisState): - self._apply_basis_state(operations[0].parameters[0], operations[0].wires) - operations = operations[1:] - - for operation in operations: - if isinstance(operation, (StatePrep, BasisState)): - raise DeviceError( - f"Operation {operation.name} cannot be used after other " - + f"Operations have already been applied on a {self.short_name} device." - ) +# LightningGPU specific methods +def check_gpu_resources() -> None: + """Check the available resources of each Nvidia GPU""" + if find_library("custatevec") is None and not imp_util.find_spec("cuquantum"): - self.apply_lightning(operations) + raise ImportError( + "cuStateVec libraries not found. Please pip install the appropriate cuStateVec library in a virtual environment." + ) - @staticmethod - def _check_adjdiff_supported_operations(operations): - """Check Lightning adjoint differentiation method support for a tape. + if not DevPool.getTotalDevices(): + raise ValueError("No supported CUDA-capable device found") - Raise ``QuantumFunctionError`` if ``tape`` contains not supported measurements, - observables, or operations by the Lightning adjoint differentiation method. + if not is_gpu_supported(): + raise ValueError(f"CUDA device is an unsupported version: {get_gpu_arch()}") - Args: - tape (.QuantumTape): quantum tape to differentiate. - """ - for op in operations: - if op.num_params > 1 and not isinstance(op, Rot): - raise QuantumFunctionError( - f"The {op.name} operation is not supported using " - 'the "adjoint" differentiation method' - ) - def _init_process_jacobian_tape(self, tape, starting_state, use_device_state): - """Generate an initial state vector for ``_process_jacobian_tape``.""" - if starting_state is not None: - if starting_state.size != 2 ** len(self.wires): - raise QuantumFunctionError( - "The number of qubits of starting_state must be the same as " - "that of the device." - ) - self._apply_state_vector(starting_state, self.wires) - elif not use_device_state: - self.reset() - self.apply(tape.operations) - return self._gpu_state - - # pylint: disable=too-many-branches - def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False): - """Implements the adjoint method outlined in - `Jones and Gacon `__ to differentiate an input tape. - - After a forward pass, the circuit is reversed by iteratively applying adjoint - gates to scan backwards through the circuit. - """ - if self.shots is not None: - warn( - "Requested adjoint differentiation to be computed with finite shots." - " The derivative is always exact when using the adjoint differentiation method.", - UserWarning, - ) +@simulator_tracking +@single_tape_support +class LightningGPU(LightningBase): + """PennyLane Lightning GPU device. - tape_return_type = self._check_adjdiff_supported_measurements(tape.measurements) + A device that interfaces with C++ to perform fast linear algebra calculations. - if not tape_return_type: # the tape does not have measurements - return np.array([], dtype=self.state.dtype) + Use of this device requires pre-built binaries or compilation from source. Check out the + :doc:`/lightning_gpu/installation` guide for more details. - if tape_return_type is State: # pragma: no cover - raise QuantumFunctionError( - "Adjoint differentiation method does not support measurement StateMP." - "Use vjp method instead for this purpose." - ) + Args: + wires (int): the number of wires to initialize the device with + c_dtype: Datatypes for statevector representation. Must be one of + ``np.complex64`` or ``np.complex128``. + shots (int): How many times the circuit should be evaluated (or sampled) to estimate + the expectation values. Defaults to ``None`` if not specified. Setting + to ``None`` results in computing statistics like expectation values and + variances analytically. + batch_obs (bool): Determine whether we process observables in parallel when + computing the jacobian. This value is only relevant when the lightning.gpu + is built with MPI. Default is False. + mpi (bool): declare if the device will use the MPI support. + mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. + sync (bool): is host-device data copy synchronized or not. + """ - # Check adjoint diff support - self._check_adjdiff_supported_operations(tape.operations) + # General device options + _device_options = ("c_dtype", "batch_obs") - if self._mpi: - split_obs = False # with MPI batched means compute Jacobian one observables at a time, no point splitting linear combinations - else: - split_obs = self._dp.getTotalDevices() if self._batch_obs else False - processed_data = self._process_jacobian_tape( - tape, starting_state, use_device_state, self._mpi, split_obs - ) + # Device specific options + _CPP_BINARY_AVAILABLE = LGPU_CPP_BINARY_AVAILABLE + _backend_info = backend_info if LGPU_CPP_BINARY_AVAILABLE else None - if not processed_data: # training_params is empty - return np.array([], dtype=self.state.dtype) + # This `config` is used in Catalyst-Frontend + config = Path(__file__).parent / "lightning_gpu.toml" - trainable_params = processed_data["tp_shift"] - # pylint: disable=pointless-string-statement - """ - This path enables controlled batching over the requested observables, be they explicit, or part of a Hamiltonian. - The traditional path will assume there exists enough free memory to preallocate all arrays and run through each observable iteratively. - However, for larger system, this becomes impossible, and we hit memory issues very quickly. the batching support here enables several functionalities: - - Pre-allocate memory for all observables on the primary GPU (`batch_obs=False`, default behaviour): This is the simplest path, and works best for few observables, and moderate qubit sizes. All memory is preallocated for each observable, and run through iteratively on a single GPU. - - Evenly distribute the observables over all available GPUs (`batch_obs=True`): This will evenly split the data into ceil(num_obs/num_gpus) chunks, and allocate enough space on each GPU up-front before running through them concurrently. This relies on C++ threads to handle the orchestration. - - Allocate at most `n` observables per GPU (`batch_obs=n`): Providing an integer value restricts each available GPU to at most `n` copies of the statevector, and hence `n` given observables for a given batch. This will iterate over the data in chnuks of size `n*num_gpus`. - """ - adjoint_jacobian = _adj_dtype(self.use_csingle, self._mpi)() - - if self._batch_obs: # Batching of Measurements - jac = adjoint_jacobian.batched( - self._gpu_state, - processed_data["obs_serialized"], - processed_data["ops_serialized"], - trainable_params, - ) - else: - jac = adjoint_jacobian( - self._gpu_state, - processed_data["obs_serialized"], - processed_data["ops_serialized"], - trainable_params, - ) - jac = np.array(jac) - has_shape0 = bool(len(jac)) + # TODO: Move supported ops/obs to TOML file + operations = _operations + # The names of the supported operations. - num_obs = len(np.unique(processed_data["obs_indices"])) - rows = processed_data["obs_indices"] - cols = np.arange(len(rows), dtype=int) - data = np.ones(len(rows)) - red_mat = csr_matrix((data, (rows, cols)), shape=(num_obs, len(rows))) - jac = red_mat @ jac.reshape((len(rows), -1)) - jac = jac.reshape(-1, len(trainable_params)) if has_shape0 else jac - jac_r = np.zeros((jac.shape[0], processed_data["all_params"])) - jac_r[:, processed_data["record_tp_rows"]] = jac - return self._adjoint_jacobian_processing(jac_r) + observables = _observables + # The names of the supported observables. - # pylint: disable=inconsistent-return-statements, line-too-long, missing-function-docstring - def vjp(self, measurements, grad_vec, starting_state=None, use_device_state=False): - """Generate the processing function required to compute the vector-Jacobian products - of a tape. + def __init__( # pylint: disable=too-many-arguments + self, + wires: Union[int, List], + *, + c_dtype: Union[np.complex128, np.complex64] = np.complex128, + shots: Union[int, List] = None, + batch_obs: bool = False, + # GPU and MPI arguments + mpi: bool = False, + mpi_buf_size: int = 0, + sync: bool = False, + ): + if not self._CPP_BINARY_AVAILABLE: + raise ImportError( + "Pre-compiled binaries for lightning.gpu are not available. " + "To manually compile from source, follow the instructions at " + "https://docs.pennylane.ai/projects/lightning/en/stable/dev/installation.html." + ) - This function can be used with multiple expectation values or a quantum state. - When a quantum state is given, + check_gpu_resources() - .. code-block:: python + super().__init__( + wires=wires, + c_dtype=c_dtype, + shots=shots, + batch_obs=batch_obs, + ) - vjp_f = dev.vjp([qml.state()], grad_vec) - vjp = vjp_f(tape) + # Set the attributes to call the LightningGPU classes + self._set_lightning_classes() - computes :math:`w = (w_1,\\cdots,w_m)` where + # GPU specific options + self._dp = DevPool() + self._sync = sync - .. math:: + # Creating the state vector + self._mpi_handler = MPIHandler(mpi, mpi_buf_size, len(self.wires), c_dtype) - w_k = \\langle v| \\frac{\\partial}{\\partial \\theta_k} | \\psi_{\\pmb{\\theta}} \\rangle. + self._statevector = self.LightningStateVector( + num_wires=len(self.wires), dtype=c_dtype, mpi_handler=self._mpi_handler, sync=self._sync + ) - Here, :math:`m` is the total number of trainable parameters, - :math:`\\pmb{\\theta}` is the vector of trainable parameters and - :math:`\\psi_{\\pmb{\\theta}}` is the output quantum state. + @property + def name(self): + """The name of the device.""" + return "lightning.gpu" - Args: - measurements (list): List of measurement processes for vector-Jacobian product. - Now it must be expectation values or a quantum state. - grad_vec (tensor_like): Gradient-output vector. Must have shape matching the output - shape of the corresponding tape, i.e. number of measurements if the return - type is expectation or :math:`2^N` if the return type is statevector - starting_state (tensor_like): post-forward pass state to start execution with. - It should be complex-valued. Takes precedence over ``use_device_state``. - use_device_state (bool): use current device state to initialize. - A forward pass of the same circuit should be the last thing the device - has executed. If a ``starting_state`` is provided, that takes precedence. + def _set_lightning_classes(self): + """Load the LightningStateVector, LightningMeasurements, LightningAdjointJacobian as class attribute""" + self.LightningStateVector = LightningGPUStateVector + self.LightningMeasurements = LightningGPUMeasurements + self.LightningAdjointJacobian = LightningGPUAdjointJacobian - Returns: - The processing function required to compute the vector-Jacobian products of a tape. + def _setup_execution_config(self, config): """ - if self.shots is not None: - warn( - "Requested adjoint differentiation to be computed with finite shots." - " The derivative is always exact when using the adjoint differentiation method.", - UserWarning, - ) - - tape_return_type = self._check_adjdiff_supported_measurements(measurements) - - if math.allclose(grad_vec, 0) or tape_return_type is None: - return lambda tape: math.convert_like(np.zeros(len(tape.trainable_params)), grad_vec) + Update the execution config with choices for how the device should be used and the device options. + """ + updated_values = {} + if config.gradient_method == "best": + updated_values["gradient_method"] = "adjoint" + if config.use_device_gradient is None: + updated_values["use_device_gradient"] = config.gradient_method in ("best", "adjoint") + if config.grad_on_execution is None: + updated_values["grad_on_execution"] = True - if tape_return_type is Expectation: - if len(grad_vec) != len(measurements): - raise ValueError( - "Number of observables in the tape must be the same as the length of grad_vec in the vjp method" - ) + new_device_options = dict(config.device_options) + for option in self._device_options: + if option not in new_device_options: + new_device_options[option] = getattr(self, f"_{option}", None) - if np.iscomplexobj(grad_vec): - raise ValueError( - "The vjp method only works with a real-valued grad_vec when the tape is returning an expectation value" - ) + # It is necessary to set the mcmc default configuration to complete the requirements of ExecuteConfig + mcmc_default = {"mcmc": False, "kernel_name": None, "num_burnin": 0, "rng": None} + new_device_options.update(mcmc_default) - ham = qml.Hamiltonian(grad_vec, [m.obs for m in measurements]) + return replace(config, **updated_values, device_options=new_device_options) - # pylint: disable=protected-access - def processing_fn(tape): - nonlocal ham - num_params = len(tape.trainable_params) + def preprocess(self, execution_config: ExecutionConfig = DefaultExecutionConfig): + """This function defines the device transform program to be applied and an updated device configuration. - if num_params == 0: - return np.array([], dtype=self.state.dtype) + Args: + execution_config (Union[ExecutionConfig, Sequence[ExecutionConfig]]): A data structure describing the + parameters needed to fully describe the execution. - new_tape = tape.copy() - new_tape._measurements = [qml.expval(ham)] + Returns: + TransformProgram, ExecutionConfig: A transform program that when called returns :class:`~.QuantumTape`'s that the + device can natively execute as well as a postprocessing function to be called after execution, and a configuration + with unset specifications filled in. - return self.adjoint_jacobian(new_tape, starting_state, use_device_state) + This device: - return processing_fn + * Supports any qubit operations that provide a matrix + * Currently does not support finite shots + * Currently does not intrinsically support parameter broadcasting - # pylint: disable=attribute-defined-outside-init - def sample(self, observable, shot_range=None, bin_size=None, counts=False): - """Return samples of an observable.""" - diagonalizing_gates = observable.diagonalizing_gates() - if diagonalizing_gates: - self.apply(diagonalizing_gates) - if not isinstance(observable, qml.PauliZ): - self._samples = self.generate_samples() - results = super().sample( - observable, shot_range=shot_range, bin_size=bin_size, counts=counts + """ + exec_config = self._setup_execution_config(execution_config) + program = TransformProgram() + + program.add_transform(validate_measurements, name=self.name) + program.add_transform(validate_observables, accepted_observables, name=self.name) + program.add_transform(validate_device_wires, self.wires, name=self.name) + program.add_transform( + mid_circuit_measurements, device=self, mcm_config=exec_config.mcm_config ) - if diagonalizing_gates: - self.apply([qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]) - return results - - def generate_samples(self): - """Generate samples - Returns: - array[int]: array of samples in binary representation with shape - ``(dev.shots, dev.num_wires)`` - """ - shots = self.shots if isinstance(self.shots, int) else self.shots.total_shots + program.add_transform( + decompose, + stopping_condition=stopping_condition, + stopping_condition_shots=stopping_condition_shots, + skip_initial_state_prep=True, + name=self.name, + ) + program.add_transform(qml.transforms.broadcast_expand) - return self.measurements.generate_samples(len(self.wires), shots).astype(int, copy=False) + if exec_config.gradient_method == "adjoint": + _add_adjoint_transforms(program) + return program, exec_config - # pylint: disable=protected-access - def expval(self, observable, shot_range=None, bin_size=None): - """Expectation value of the supplied observable. + # pylint: disable=unused-argument + def execute( + self, + circuits: QuantumTape_or_Batch, + execution_config: ExecutionConfig = DefaultExecutionConfig, + ) -> Result_or_ResultBatch: + """Execute a circuit or a batch of circuits and turn it into results. Args: - observable: A PennyLane observable. - shot_range (tuple[int]): 2-tuple of integers specifying the range of samples - to use. If not specified, all samples are used. - bin_size (int): Divides the shot range into bins of size ``bin_size``, and - returns the measurement statistic separately over each bin. If not - provided, the entire shot range is treated as a single bin. + circuits (Union[QuantumTape, Sequence[QuantumTape]]): the quantum circuits to be executed + execution_config (ExecutionConfig): a datastructure with additional information required for execution Returns: - Expectation value of the observable + TensorLike, tuple[TensorLike], tuple[tuple[TensorLike]]: A numeric result of the computation. """ - if isinstance(observable, qml.Projector): - diagonalizing_gates = observable.diagonalizing_gates() - if self.shots is None and diagonalizing_gates: - self.apply(diagonalizing_gates) - results = super().expval(observable, shot_range=shot_range, bin_size=bin_size) - if self.shots is None and diagonalizing_gates: - self.apply([qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]) - return results - - if self.shots is not None: - # estimate the expectation value - samples = self.sample(observable, shot_range=shot_range, bin_size=bin_size) - return np.squeeze(np.mean(samples, axis=0)) - - if isinstance(observable, qml.SparseHamiltonian): - if self._mpi: - # Identity for CSR_SparseHamiltonian to pass to processes with rank != 0 to reduce - # host(cpu) memory requirements - obs = qml.Identity(0) - Hmat = qml.Hamiltonian([1.0], [obs]).sparse_matrix() - H_sparse = qml.SparseHamiltonian(Hmat, wires=range(1)) - CSR_SparseHamiltonian = H_sparse.sparse_matrix().tocsr() - # CSR_SparseHamiltonian for rank == 0 - if self._mpi_manager.getRank() == 0: - CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() - else: - CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() - - return self.measurements.expval( - CSR_SparseHamiltonian.indptr, - CSR_SparseHamiltonian.indices, - CSR_SparseHamiltonian.data, - ) - - # use specialized functors to compute expval(Hermitian) - if isinstance(observable, qml.Hermitian): - observable_wires = self.map_wires(observable.wires) - if self._mpi and len(observable_wires) > self._num_local_wires: - raise RuntimeError( - "MPI backend does not support Hermitian with number of target wires larger than local wire number." + results = [] + for circuit in circuits: + if self._wire_map is not None: + [circuit], _ = qml.map_wires(circuit, self._wire_map) + results.append( + self.simulate( + circuit, + self._statevector, ) - matrix = observable.matrix() - return self.measurements.expval(matrix, observable_wires) - - if ( - isinstance(observable, qml.ops.Hamiltonian) - or (observable.arithmetic_depth > 0) - or isinstance(observable.name, List) - ): - ob_serialized = QuantumScriptSerializer( - self.short_name, self.use_csingle, self._mpi - )._ob(observable, self.wire_map) - return self.measurements.expval(ob_serialized) + ) - # translate to wire labels used by device - observable_wires = self.map_wires(observable.wires) + return tuple(results) - return self.measurements.expval(observable.name, observable_wires) + def supports_derivatives( + self, + execution_config: Optional[ExecutionConfig] = None, + circuit: Optional[qml.tape.QuantumTape] = None, + ) -> bool: + """Check whether or not derivatives are available for a given configuration and circuit. - def probability_lightning(self, wires=None): - """Return the probability of each computational basis state. + ``LightningGPU`` supports adjoint differentiation with analytic results. Args: - wires (Iterable[Number, str], Number, str, Wires): wires to return - marginal probabilities for. Wires not provided are traced out of the system. + execution_config (ExecutionConfig): The configuration of the desired derivative calculation + circuit (QuantumTape): An optional circuit to check derivatives support for. Returns: - array[float]: list of the probabilities + Bool: Whether or not a derivative can be calculated provided the given information + """ - # translate to wire labels used by device - observable_wires = self.map_wires(wires) - # Device returns as col-major orderings, so perform transpose on data for bit-index shuffle for now. - local_prob = self.measurements.probs(observable_wires) - if len(local_prob) > 0: - num_local_wires = len(local_prob).bit_length() - 1 if len(local_prob) > 0 else 0 - return local_prob.reshape([2] * num_local_wires).transpose().reshape(-1) - return local_prob - - def var(self, observable, shot_range=None, bin_size=None): - """Variance of the supplied observable. + if execution_config is None and circuit is None: + return True + if execution_config.gradient_method not in {"adjoint", "best"}: + return False + if circuit is None: + return True + return _supports_adjoint(circuit=circuit) + + def simulate( + self, + circuit: QuantumScript, + state: LightningGPUStateVector, + ) -> Result: + """Simulate a single quantum script. Args: - observable: A PennyLane observable. - shot_range (tuple[int]): 2-tuple of integers specifying the range of samples - to use. If not specified, all samples are used. - bin_size (int): Divides the shot range into bins of size ``bin_size``, and - returns the measurement statistic separately over each bin. If not - provided, the entire shot range is treated as a single bin. + circuit (QuantumTape): The single circuit to simulate + state (LightningGPUStateVector): handle to Lightning state vector Returns: - Variance of the observable - """ - if isinstance(observable, qml.Projector): - diagonalizing_gates = observable.diagonalizing_gates() - if self.shots is None and diagonalizing_gates: - self.apply(diagonalizing_gates) - results = super().var(observable, shot_range=shot_range, bin_size=bin_size) - if self.shots is None and diagonalizing_gates: - self.apply([qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]) - return results - - if self.shots is not None: - # estimate the var - # Lightning doesn't support sampling yet - samples = self.sample(observable, shot_range=shot_range, bin_size=bin_size) - return np.squeeze(np.var(samples, axis=0)) - - if isinstance(observable, qml.SparseHamiltonian): - csr_hamiltonian = observable.sparse_matrix(wire_order=self.wires).tocsr(copy=False) - return self.measurements.var( - csr_hamiltonian.indptr, - csr_hamiltonian.indices, - csr_hamiltonian.data, - ) + Tuple[TensorLike]: The results of the simulation - if ( - isinstance(observable, (qml.Hermitian, qml.ops.Hamiltonian)) - or (observable.arithmetic_depth > 0) - or isinstance(observable.name, List) - ): - ob_serialized = QuantumScriptSerializer( - self.short_name, self.use_csingle, self._mpi - )._ob(observable, self.wire_map) - return self.measurements.var(ob_serialized) - - # translate to wire labels used by device - observable_wires = self.map_wires(observable.wires) + Note that this function can return measurements for non-commuting observables simultaneously. + """ + if circuit.shots and (any(isinstance(op, MidMeasureMP) for op in circuit.operations)): + raise qml.DeviceError("LightningGPU does not support Mid-circuit measurements.") - return self.measurements.var(observable.name, observable_wires) + state.reset_state() + final_state = state.get_final_state(circuit) + return self.LightningMeasurements(final_state).measure_final_state(circuit) diff --git a/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py b/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py index 4338a5b87..bee481aac 100644 --- a/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py +++ b/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py @@ -15,6 +15,10 @@ Internal methods for adjoint Jacobian differentiation method. """ +from __future__ import annotations + +from warnings import warn + try: from pennylane_lightning.lightning_kokkos_ops.algorithms import ( AdjointJacobianC64, @@ -22,8 +26,8 @@ create_ops_listC64, create_ops_listC128, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) import numpy as np from pennylane.tape import QuantumTape @@ -31,8 +35,6 @@ # pylint: disable=ungrouped-imports from pennylane_lightning.core._adjoint_jacobian_base import LightningBaseAdjointJacobian -from ._state_vector import LightningKokkosStateVector - class LightningKokkosAdjointJacobian(LightningBaseAdjointJacobian): """Check and execute the adjoint Jacobian differentiation method. @@ -44,7 +46,11 @@ class LightningKokkosAdjointJacobian(LightningBaseAdjointJacobian): # pylint: disable=too-few-public-methods - def __init__(self, qubit_state: LightningKokkosStateVector, batch_obs: bool = False) -> None: + def __init__( + self, + qubit_state: LightningKokkosStateVector, # pylint: disable=undefined-variable + batch_obs: bool = False, + ) -> None: super().__init__(qubit_state, batch_obs) # Initialize the C++ binds diff --git a/pennylane_lightning/lightning_kokkos/_measurements.py b/pennylane_lightning/lightning_kokkos/_measurements.py index b438af350..ee848739c 100644 --- a/pennylane_lightning/lightning_kokkos/_measurements.py +++ b/pennylane_lightning/lightning_kokkos/_measurements.py @@ -15,11 +15,14 @@ Class implementation for state vector measurements. """ -# pylint: disable=import-error, no-name-in-module, ungrouped-imports +from __future__ import annotations + +from warnings import warn + try: from pennylane_lightning.lightning_kokkos_ops import MeasurementsC64, MeasurementsC128 -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) from typing import List @@ -28,6 +31,7 @@ from pennylane.measurements import CountsMP, SampleMeasurement, Shots from pennylane.typing import TensorLike +# pylint: disable=ungrouped-imports from pennylane_lightning.core._measurements_base import LightningBaseMeasurements @@ -44,7 +48,7 @@ class LightningKokkosMeasurements( def __init__( self, - kokkos_state, + kokkos_state: LightningKokkosStateVector, # pylint: disable=undefined-variable ) -> None: super().__init__(kokkos_state) diff --git a/pennylane_lightning/lightning_kokkos/_state_vector.py b/pennylane_lightning/lightning_kokkos/_state_vector.py index dda40ffad..9073a9dd8 100644 --- a/pennylane_lightning/lightning_kokkos/_state_vector.py +++ b/pennylane_lightning/lightning_kokkos/_state_vector.py @@ -14,6 +14,7 @@ """ Class implementation for lightning_kokkos state-vector manipulation. """ +from warnings import warn try: from pennylane_lightning.lightning_kokkos_ops import ( @@ -23,8 +24,10 @@ allocate_aligned_array, print_configuration, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) + +from typing import Union import numpy as np import pennylane as qml @@ -59,17 +62,16 @@ class LightningKokkosStateVector(LightningBaseStateVector): def __init__( self, - num_wires, - dtype=np.complex128, + num_wires: int, + dtype: Union[np.complex128, np.complex64] = np.complex128, kokkos_args=None, - sync=True, - ): # pylint: disable=too-many-arguments + ): + super().__init__(num_wires, dtype) self._device_name = "lightning.kokkos" self._kokkos_config = {} - self._sync = sync # Initialize the state vector if kokkos_args is None: @@ -143,7 +145,7 @@ def sync_d2h(self, state_vector): >>> dev = qml.device('lightning.kokkos', wires=1) >>> dev.apply([qml.PauliX(wires=[0])]) - >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.C_DTYPE) + >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.c_dtype) >>> dev.sync_d2h(state_vector) >>> print(state_vector) [0.+0.j 1.+0.j] @@ -269,9 +271,12 @@ def _apply_lightning( ) elif isinstance(operation, qml.PauliRot): method = getattr(state, "applyPauliRot") - paulis = operation._hyperparameters["pauli_word"] + # pylint: disable=protected-access + paulis = operation._hyperparameters[ + "pauli_word" + ] # pylint: disable=protected-access wires = [i for i, w in zip(wires, paulis) if w != "I"] - word = "".join(p for p in paulis if p != "I") # pylint: disable=protected-access + word = "".join(p for p in paulis if p != "I") method(wires, invert_param, operation.parameters, word) elif method is not None: # apply specialized gate param = operation.parameters diff --git a/pennylane_lightning/lightning_kokkos/lightning_kokkos.py b/pennylane_lightning/lightning_kokkos/lightning_kokkos.py index 51221dde9..b30ca1ad2 100644 --- a/pennylane_lightning/lightning_kokkos/lightning_kokkos.py +++ b/pennylane_lightning/lightning_kokkos/lightning_kokkos.py @@ -20,7 +20,7 @@ from dataclasses import replace from functools import reduce from pathlib import Path -from typing import Optional +from typing import List, Optional, Union from warnings import warn import numpy as np @@ -50,10 +50,6 @@ Result_or_ResultBatch, ) -from ._adjoint_jacobian import LightningKokkosAdjointJacobian -from ._measurements import LightningKokkosMeasurements -from ._state_vector import LightningKokkosStateVector - try: from pennylane_lightning.lightning_kokkos_ops import backend_info, print_configuration @@ -63,6 +59,10 @@ LK_CPP_BINARY_AVAILABLE = False backend_info = None +from ._adjoint_jacobian import LightningKokkosAdjointJacobian +from ._measurements import LightningKokkosMeasurements +from ._state_vector import LightningKokkosStateVector + # The set of supported operations. _operations = frozenset( { @@ -289,13 +289,12 @@ class LightningKokkos(LightningBase): def __init__( # pylint: disable=too-many-arguments self, - wires, + wires: Union[int, List], *, - c_dtype=np.complex128, - shots=None, - batch_obs=False, + c_dtype: Union[np.complex128, np.complex64] = np.complex128, + shots: Union[int, List] = None, + batch_obs: bool = False, # Kokkos arguments - sync=True, kokkos_args=None, ): if not self._CPP_BINARY_AVAILABLE: @@ -317,11 +316,10 @@ def __init__( # pylint: disable=too-many-arguments # Kokkos specific options self._kokkos_args = kokkos_args - self._sync = sync # Creating the state vector self._statevector = self.LightningStateVector( - num_wires=len(self.wires), dtype=c_dtype, kokkos_args=kokkos_args, sync=sync + num_wires=len(self.wires), dtype=c_dtype, kokkos_args=kokkos_args ) if not LightningKokkos.kokkos_config: @@ -492,7 +490,7 @@ def simulate( aux_circ, mid_measurements=mid_measurements, postselect_mode=postselect_mode ) results.append( - LightningKokkosMeasurements(final_state).measure_final_state( + self.LightningMeasurements(final_state).measure_final_state( aux_circ, mid_measurements=mid_measurements ) ) @@ -500,7 +498,7 @@ def simulate( state.reset_state() final_state = state.get_final_state(circuit) - return LightningKokkosMeasurements(final_state).measure_final_state(circuit) + return self.LightningMeasurements(final_state).measure_final_state(circuit) @staticmethod def get_c_interface(): diff --git a/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py b/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py index 0abc7f72f..390c0cf69 100644 --- a/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py +++ b/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py @@ -14,6 +14,9 @@ r""" Internal methods for adjoint Jacobian differentiation method. """ +from __future__ import annotations + +from warnings import warn try: from pennylane_lightning.lightning_qubit_ops.algorithms import ( @@ -22,8 +25,8 @@ create_ops_listC64, create_ops_listC128, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) from os import getenv @@ -34,8 +37,6 @@ # pylint: disable=ungrouped-imports from pennylane_lightning.core._adjoint_jacobian_base import LightningBaseAdjointJacobian -from ._state_vector import LightningStateVector - class LightningAdjointJacobian( LightningBaseAdjointJacobian @@ -47,7 +48,12 @@ class LightningAdjointJacobian( batch_obs(bool): If serialized tape is to be batched or not. """ - def __init__(self, qubit_state: LightningStateVector, batch_obs: bool = False) -> None: + def __init__( + self, + qubit_state: LightningStateVector, # pylint: disable=undefined-variable + batch_obs: bool = False, + ) -> None: + super().__init__(qubit_state, batch_obs) # Initialize the C++ binds diff --git a/pennylane_lightning/lightning_qubit/_measurements.py b/pennylane_lightning/lightning_qubit/_measurements.py index c1b97a118..415ce7408 100644 --- a/pennylane_lightning/lightning_qubit/_measurements.py +++ b/pennylane_lightning/lightning_qubit/_measurements.py @@ -16,10 +16,14 @@ """ # pylint: disable=import-error, no-name-in-module, ungrouped-imports +from __future__ import annotations + +from warnings import warn + try: from pennylane_lightning.lightning_qubit_ops import MeasurementsC64, MeasurementsC128 -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) from functools import reduce from typing import List @@ -53,7 +57,7 @@ class LightningMeasurements(LightningBaseMeasurements): # pylint: disable=too-f def __init__( self, - qubit_state, + qubit_state: LightningStateVector, # pylint: disable=undefined-variable mcmc: bool = None, kernel_name: str = None, num_burnin: int = None, diff --git a/pennylane_lightning/lightning_qubit/_state_vector.py b/pennylane_lightning/lightning_qubit/_state_vector.py index b4b6ef5ff..62068dcbd 100644 --- a/pennylane_lightning/lightning_qubit/_state_vector.py +++ b/pennylane_lightning/lightning_qubit/_state_vector.py @@ -14,6 +14,7 @@ """ Class implementation for lightning_qubit state-vector manipulation. """ +from warnings import warn try: from pennylane_lightning.lightning_qubit_ops import ( @@ -21,8 +22,10 @@ StateVectorC128, allocate_aligned_array, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) + +from typing import Union import numpy as np import pennylane as qml @@ -50,7 +53,8 @@ class LightningStateVector(LightningBaseStateVector): # pylint: disable=too-few device_name(string): state vector device name. Options: ["lightning.qubit"] """ - def __init__(self, num_wires, dtype=np.complex128): + def __init__(self, num_wires: int, dtype: Union[np.complex128, np.complex64] = np.complex128): + super().__init__(num_wires, dtype) self._device_name = "lightning.qubit" diff --git a/pennylane_lightning/lightning_qubit/lightning_qubit.py b/pennylane_lightning/lightning_qubit/lightning_qubit.py index c317bbfba..abf080978 100644 --- a/pennylane_lightning/lightning_qubit/lightning_qubit.py +++ b/pennylane_lightning/lightning_qubit/lightning_qubit.py @@ -18,7 +18,7 @@ from dataclasses import replace from functools import reduce from pathlib import Path -from typing import Optional, Sequence +from typing import List, Optional, Sequence, Union from warnings import warn import numpy as np @@ -48,10 +48,6 @@ Result_or_ResultBatch, ) -from ._adjoint_jacobian import LightningAdjointJacobian -from ._measurements import LightningMeasurements -from ._state_vector import LightningStateVector - try: from pennylane_lightning.lightning_qubit_ops import backend_info @@ -60,6 +56,10 @@ warn(str(ex), UserWarning) LQ_CPP_BINARY_AVAILABLE = False +from ._adjoint_jacobian import LightningAdjointJacobian +from ._measurements import LightningMeasurements +from ._state_vector import LightningStateVector + # The set of supported operations. _operations = frozenset( { @@ -323,16 +323,16 @@ class LightningQubit(LightningBase): def __init__( # pylint: disable=too-many-arguments self, - wires, + wires: Union[int, List], *, - c_dtype=np.complex128, - shots=None, - batch_obs=False, + c_dtype: Union[np.complex128, np.complex64] = np.complex128, + shots: Union[int, List] = None, + batch_obs: bool = False, # Markov Chain Monte Carlo (MCMC) sampling method arguments - seed="global", - mcmc=False, - kernel_name="Local", - num_burnin=100, + seed: Union[str, int] = "global", + mcmc: bool = False, + kernel_name: str = "Local", + num_burnin: int = 100, ): if not self._CPP_BINARY_AVAILABLE: raise ImportError( @@ -559,4 +559,4 @@ def simulate( state.reset_state() final_state = state.get_final_state(circuit) - return LightningMeasurements(final_state, **mcmc).measure_final_state(circuit) + return self.LightningMeasurements(final_state, **mcmc).measure_final_state(circuit) diff --git a/tests/conftest.py b/tests/conftest.py index a64841846..1c06ae0dc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -142,10 +142,15 @@ def get_device(): from pennylane_lightning.lightning_kokkos_ops import LightningException elif device_name == "lightning.gpu": from pennylane_lightning.lightning_gpu import LightningGPU as LightningDevice - - LightningAdjointJacobian = None - LightningMeasurements = None - LightningStateVector = None + from pennylane_lightning.lightning_gpu._adjoint_jacobian import ( + LightningGPUAdjointJacobian as LightningAdjointJacobian, + ) + from pennylane_lightning.lightning_gpu._measurements import ( + LightningGPUMeasurements as LightningMeasurements, + ) + from pennylane_lightning.lightning_gpu._state_vector import ( + LightningGPUStateVector as LightningStateVector, + ) if hasattr(pennylane_lightning, "lightning_gpu_ops"): import pennylane_lightning.lightning_gpu_ops as lightning_ops diff --git a/tests/lightning_qubit/test_adjoint_jacobian_class.py b/tests/lightning_qubit/test_adjoint_jacobian_class.py index 236c697f1..7fbaa19b1 100644 --- a/tests/lightning_qubit/test_adjoint_jacobian_class.py +++ b/tests/lightning_qubit/test_adjoint_jacobian_class.py @@ -481,6 +481,7 @@ def test_hermitian_expectation(self, tol, lightning_sv): tape.trainable_params = {0} statevector.reset_state() + vjp = self.calculate_vjp(statevector, tape, dy) assert np.allclose(vjp, -0.8 * np.sin(x), atol=tol) @@ -498,6 +499,7 @@ def test_hermitian_tensor_expectation(self, tol, lightning_sv): tape.trainable_params = {0} statevector.reset_state() + vjp = self.calculate_vjp(statevector, tape, dy) assert np.allclose(vjp, -0.8 * np.sin(x), atol=tol) diff --git a/tests/lightning_qubit/test_measurements_class.py b/tests/lightning_qubit/test_measurements_class.py index 471fb6de6..c5c61e054 100644 --- a/tests/lightning_qubit/test_measurements_class.py +++ b/tests/lightning_qubit/test_measurements_class.py @@ -669,8 +669,8 @@ def test_double_return_value(self, shots, measurement, obs0_, obs1_, lightning_s assert np.allclose(r, e, atol=dtol, rtol=dtol) @pytest.mark.skipif( - device_name == "lightning.tensor", - reason="lightning.tensor does not support out of order probs.", + device_name in ("lightning.gpu", "lightning.tensor"), + reason=f"{device_name} does not support out of order probs.", ) @pytest.mark.parametrize( "cases", diff --git a/tests/lightning_qubit/test_state_vector_class.py b/tests/lightning_qubit/test_state_vector_class.py index 3918afcd5..b3baaa3ea 100644 --- a/tests/lightning_qubit/test_state_vector_class.py +++ b/tests/lightning_qubit/test_state_vector_class.py @@ -30,6 +30,9 @@ except ImportError: pass +if device_name == "lightning.gpu": + from pennylane_lightning.lightning_gpu._mpi_handler import MPIHandler + if device_name == "lightning.tensor": pytest.skip("Skipping tests for the LightningTensor class.", allow_module_level=True) @@ -39,6 +42,7 @@ allow_module_level=True, ) + if not LightningDevice._CPP_BINARY_AVAILABLE: pytest.skip("No binary module found. Skipping.", allow_module_level=True) @@ -86,10 +90,18 @@ def test_apply_state_vector_with_lightning_handle(tol): state_vector_1 = LightningStateVector(2) state_vector_1.apply_operations([qml.BasisState(np.array([0, 1]), wires=[0, 1])]) - state_vector_2 = LightningStateVector(2) - state_vector_2._apply_state_vector(state_vector_1.state_vector, Wires([0, 1])) + if device_name == "lightning.gpu": + with pytest.raises( + qml.DeviceError, match="LightningGPU does not support allocate external state_vector." + ): + state_vector_2 = LightningStateVector(2) + state_vector_2._apply_state_vector(state_vector_1.state_vector, Wires([0, 1])) + + else: + state_vector_2 = LightningStateVector(2) + state_vector_2._apply_state_vector(state_vector_1.state_vector, Wires([0, 1])) - assert np.allclose(state_vector_1.state, state_vector_2.state, atol=tol, rtol=0) + assert np.allclose(state_vector_1.state, state_vector_2.state, atol=tol, rtol=0) @pytest.mark.parametrize( diff --git a/tests/new_api/test_device.py b/tests/new_api/test_device.py index 0485f3a05..111dd3af7 100644 --- a/tests/new_api/test_device.py +++ b/tests/new_api/test_device.py @@ -43,8 +43,7 @@ validate_measurements, validate_observables, ) - -if device_name == "lightning.kokkos": +elif device_name == "lightning.kokkos": from pennylane_lightning.lightning_kokkos.lightning_kokkos import ( _add_adjoint_transforms, _adjoint_ops, @@ -62,13 +61,31 @@ validate_measurements, validate_observables, ) - - -if device_name == "lightning.tensor": +elif device_name == "lightning.gpu": + from pennylane_lightning.lightning_gpu.lightning_gpu import ( + _add_adjoint_transforms, + _adjoint_ops, + _supports_adjoint, + accepted_observables, + adjoint_measurements, + adjoint_observables, + decompose, + mid_circuit_measurements, + no_sampling, + stopping_condition, + stopping_condition_shots, + validate_adjoint_trainable_params, + validate_device_wires, + validate_measurements, + validate_observables, + ) +elif device_name == "lightning.tensor": from pennylane_lightning.lightning_tensor.lightning_tensor import ( accepted_observables, stopping_condition, ) +else: + raise TypeError(f"The device name: {device_name} is not a valid name") if not LightningDevice._new_API: pytest.skip("Exclusive tests for new device API. Skipping.", allow_module_level=True) @@ -448,6 +465,11 @@ def test_execute_single_measurement(self, theta, phi, mp, dev): if isinstance(mp.obs, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): mp.obs = qml.operation.convert_to_legacy_H(mp.obs) + if isinstance(mp.obs, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + qs = QuantumScript( [ qml.RX(phi, 0), @@ -641,6 +663,12 @@ def test_supports_derivatives(self, dev, config, tape, expected, batch_obs): qml.Z(1) + qml.X(1), qml.Hamiltonian([-1.0, 1.5], [qml.Z(1), qml.X(1)]), qml.Hermitian(qml.Hadamard.compute_matrix(), 0), + qml.SparseHamiltonian( + qml.Hamiltonian([-1.0, 1.5], [qml.Z(1), qml.X(1)]).sparse_matrix( + wire_order=[0, 1, 2] + ), + wires=[0, 1, 2], + ), qml.Projector([1], 1), ], ) @@ -649,6 +677,11 @@ def test_derivatives_single_expval( self, theta, phi, dev, obs, execute_and_derivatives, batch_obs ): """Test that the jacobian is correct when a tape has a single expectation value""" + if isinstance(obs, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + if isinstance(obs, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): obs = qml.operation.convert_to_legacy_H(obs) @@ -705,6 +738,11 @@ def test_derivatives_multi_expval( self, theta, phi, omega, dev, obs1, obs2, execute_and_derivatives, batch_obs ): """Test that the jacobian is correct when a tape has multiple expectation values""" + if isinstance(obs2, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + if isinstance(obs1, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): obs1 = qml.operation.convert_to_legacy_H(obs1) if isinstance(obs2, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): @@ -1074,6 +1112,11 @@ def test_vjp_multi_expval( self, theta, phi, omega, dev, obs1, obs2, execute_and_derivatives, batch_obs ): """Test that the VJP is correct when a tape has multiple expectation values""" + if isinstance(obs2, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + if isinstance(obs1, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): obs1 = qml.operation.convert_to_legacy_H(obs1) if isinstance(obs2, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): diff --git a/tests/test_var.py b/tests/test_var.py index 4b4e8561f..7bdcec2c2 100644 --- a/tests/test_var.py +++ b/tests/test_var.py @@ -24,7 +24,6 @@ if not ld._CPP_BINARY_AVAILABLE: pytest.skip("No binary module found. Skipping.", allow_module_level=True) - np.random.seed(42)