Add controlled gate support to lightning.tensor (#880)

### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [ ] Ensure that the test suite passes, by running `make test`. - [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [x] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** Add controlled gate support to `lightning.tensor`. This could improve the performance of `lightning.tensor` by avoiding full gate matrix build for controlled gates. However, as of v24.08, cutensornet only supports controlled gates with only 1 wire target. [SC-72522] **Description of the Change:** **Benefits:** **Possible Drawbacks:** **Related GitHub Issues:** --------- Co-authored-by: ringo-but-quantum <[email protected]> Co-authored-by: Vincent Michaud-Rioux <[email protected]> Co-authored-by: Ali Asadi <[email protected]>
PennyLaneAI · Sep 6, 2024 · c676ced · c676ced
1 parent 4bb34b7
commit c676ced
Show file tree

Hide file tree

Showing 7 changed files with 258 additions and 23 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ### New features since last release
 
+* Add 1-target wire controlled gate support to `lightning.tensor`. Note that `cutensornet` only supports 1-target wire controlled gate as of `v24.08`. A controlled gate with more than 1 target wire should be converted to dense matrix.
+  [(#880)](https://github.com/PennyLaneAI/pennylane-lightning/pull/880)
+
 * Lightning-Kokkos migrated to the new device API.
   [(#810)](https://github.com/PennyLaneAI/pennylane-lightning/pull/810)
 

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp
@@ -73,6 +73,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
     std::shared_ptr<TNCudaGateCache<PrecisionT>> gate_cache_;
     std::set<int64_t> gate_ids_;
 
+    std::vector<std::size_t> identiy_gate_ids_;
+
   public:
     TNCudaBase() = delete;
 
@@ -244,6 +246,87 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         }
     }
 
+    /**
+     * @brief Append a single controlled gate tensor to the compute graph.
+     *
+     * NOTE: This function does not update the quantum state but only appends
+     * gate tensor operator to the graph. The controlled gate should be
+     * immutable as v24.08.
+     *
+     * @param baseOpName Base gate's name.
+     * @param controlled_wires Controlled wires for the gate.
+     * @param controlled_values Controlled values for the gate.
+     * @param targetWires Target wires for the gate.
+     * @param adjoint Indicates whether to use adjoint of gate.
+     * @param params Optional parameter list for parametric gates.
+     * @param gate_matrix Optional gate matrix for custom gates.
+     */
+    void
+    applyControlledOperation(const std::string &baseOpName,
+                             const std::vector<std::size_t> &controlled_wires,
+                             const std::vector<bool> &controlled_values,
+                             const std::vector<std::size_t> &targetWires,
+                             bool adjoint = false,
+                             const std::vector<PrecisionT> &params = {0.0},
+                             const std::vector<ComplexT> &gate_matrix = {}) {
+        // TODO: Need to revisit this line of code once `cutensornet` supports
+        // multi-target wire controlled gates
+        PL_ABORT_IF_NOT(targetWires.size() == 1,
+                        "Unsupported controlled gate: cutensornet only "
+                        "supports 1-wire target controlled gates");
+
+        auto &&par = (params.empty()) ? std::vector<PrecisionT>{0.0} : params;
+
+        int64_t dummy_id = gate_ids_.empty() ? 1 : *gate_ids_.rbegin() + 1;
+
+        if (gate_matrix.empty()) {
+            gate_cache_->add_gate(dummy_id, baseOpName, par, adjoint);
+        } else {
+            auto gate_key = std::make_pair(baseOpName, par);
+            std::vector<CFP_t> matrix_cu =
+                cuUtil::complexToCu<ComplexT>(gate_matrix);
+            gate_cache_->add_gate(dummy_id, gate_key, matrix_cu, adjoint);
+        }
+
+        int64_t id;
+
+        std::vector<int32_t> controlledModes =
+            cuUtil::NormalizeCastIndices<std::size_t, int32_t>(
+                controlled_wires, BaseType::getNumQubits());
+
+        std::vector<int64_t> controlled_values_int64(controlled_values.size());
+        std::transform(controlled_values.begin(), controlled_values.end(),
+                       controlled_values_int64.begin(),
+                       [](bool val) { return static_cast<int64_t>(val); });
+
+        std::vector<int32_t> targetModes =
+            cuUtil::NormalizeCastIndices<std::size_t, int32_t>(
+                targetWires, BaseType::getNumQubits());
+
+        PL_CUTENSORNET_IS_SUCCESS(cutensornetStateApplyControlledTensorOperator(
+            /* const cutensornetHandle_t */ getTNCudaHandle(),
+            /* cutensornetState_t */ getQuantumState(),
+            /* int32_t numControlModes */ controlled_wires.size(),
+            /* const int32_t * stateControlModes */ controlledModes.data(),
+            /* const int64_t *stateControlValues*/
+            controlled_values_int64.data(),
+            /* int32_t numTargetModes */ targetWires.size(),
+            /* const int32_t * stateTargetModes */ targetModes.data(),
+            /* void * */
+            static_cast<void *>(gate_cache_->get_gate_device_ptr(dummy_id)),
+            /* const int64_t *tensorModeStrides */ nullptr,
+            /* const int32_t immutable */ 1,
+            /* const int32_t adjoint */ 0,
+            /* const int32_t unitary */ 1,
+            /* int64_t tensorId* */ &id));
+
+        if (dummy_id != id) {
+            gate_cache_->update_key(dummy_id, id);
+        }
+
+        gate_ids_.insert(id);
+    }
+
     /**
      * @brief Append a single gate tensor to the compute graph.
      * NOTE: This function does not update the quantum state but only appends
@@ -285,8 +368,6 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             cuUtil::NormalizeCastIndices<std::size_t, int32_t>(
                 wires, BaseType::getNumQubits());
 
-        // TODO: Need changes to support to the controlled gate tensor API once
-        // the API is finalized in cutensornet lib.
         //  Note `adjoint` in the cutensornet context indicates whether or not
         //  all tensor elements of the tensor operator will be complex
         //  conjugated. `adjoint` in the following API is not equivalent to
@@ -308,6 +389,11 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             gate_cache_->update_key(dummy_id, id);
         }
 
+        // one time initialization of the identity gate id
+        if (identiy_gate_ids_.empty() && opName == "Identity") {
+            identiy_gate_ids_.push_back(static_cast<std::size_t>(id));
+        }
+
         gate_ids_.insert(id);
     }
 
@@ -489,18 +575,18 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
      * gate cache is empty or update the existing gate operator by itself.
      */
     void dummy_tensor_update() {
-        if (gate_cache_->is_empty()) {
+        if (identiy_gate_ids_.empty()) {
             applyOperation("Identity", {0}, false);
         }
 
-        const std::size_t id = gate_cache_->get_cache_head_idx();
-
         PL_CUTENSORNET_IS_SUCCESS(cutensornetStateUpdateTensorOperator(
             /* const cutensornetHandle_t */ getTNCudaHandle(),
             /* cutensornetState_t */ getQuantumState(),
-            /* int64_t tensorId*/ static_cast<int64_t>(id),
+            /* int64_t tensorId*/
+            static_cast<int64_t>(identiy_gate_ids_.front()),
             /* void* */
-            static_cast<void *>(gate_cache_->get_gate_device_ptr(id)),
+            static_cast<void *>(
+                gate_cache_->get_gate_device_ptr(identiy_gate_ids_.front())),
             /* int32_t unitary*/ 1));
     }
 

diff --git a/..._lightning/core/src/simulators/lightning_tensor/tncuda/bindings/LTensorTNCudaBindings.hpp b/..._lightning/core/src/simulators/lightning_tensor/tncuda/bindings/LTensorTNCudaBindings.hpp
@@ -30,6 +30,7 @@
 #include "Error.hpp"
 #include "MPSTNCuda.hpp"
 #include "TypeList.hpp"
+#include "Util.hpp"
 #include "cuda_helpers.hpp"
 
 /// @cond DEV
@@ -47,12 +48,66 @@ namespace Pennylane::LightningTensor::TNCuda {
 using TensorNetBackends =
     Pennylane::Util::TypeList<MPSTNCuda<float>, MPSTNCuda<double>, void>;
 
+/**
+ * @brief Register controlled matrix kernel.
+ */
+template <class TensorNet>
+void applyControlledMatrix(
+    TensorNet &tensor_network,
+    const py::array_t<std::complex<typename TensorNet::PrecisionT>,
+                      py::array::c_style | py::array::forcecast> &matrix,
+    const std::vector<std::size_t> &controlled_wires,
+    const std::vector<bool> &controlled_values,
+    const std::vector<std::size_t> &target_wires, bool inverse = false) {
+    using ComplexT = typename TensorNet::ComplexT;
+    const auto m_buffer = matrix.request();
+    std::vector<ComplexT> conv_matrix;
+    if (m_buffer.size) {
+        const auto m_ptr = static_cast<const ComplexT *>(m_buffer.ptr);
+        conv_matrix = std::vector<ComplexT>{m_ptr, m_ptr + m_buffer.size};
+    }
+
+    tensor_network.applyControlledOperation(
+        "applexControlledMatrix", controlled_wires, controlled_values,
+        target_wires, inverse, {}, conv_matrix);
+}
+
+template <class TensorNet, class PyClass>
+void registerControlledGate(PyClass &pyclass) {
+    using PrecisionT = typename TensorNet::PrecisionT; // TensorNet's precision
+    using ParamT = PrecisionT; // Parameter's data precision
+
+    using Pennylane::Gates::ControlledGateOperation;
+    using Pennylane::Util::for_each_enum;
+    namespace Constant = Pennylane::Gates::Constant;
+
+    for_each_enum<ControlledGateOperation>(
+        [&pyclass](ControlledGateOperation gate_op) {
+            using Pennylane::Util::lookup;
+            const auto gate_name =
+                std::string(lookup(Constant::controlled_gate_names, gate_op));
+            const std::string doc = "Apply the " + gate_name + " gate.";
+            auto func = [gate_name = gate_name](
+                            TensorNet &tensor_network,
+                            const std::vector<std::size_t> &controlled_wires,
+                            const std::vector<bool> &controlled_values,
+                            const std::vector<std::size_t> &target_wires,
+                            bool inverse, const std::vector<ParamT> &params) {
+                tensor_network.applyControlledOperation(
+                    gate_name, controlled_wires, controlled_values,
+                    target_wires, inverse, params);
+            };
+            pyclass.def(gate_name.c_str(), func, doc.c_str());
+        });
+}
+
 /**
  * @brief Get a gate kernel map for a tensor network.
  */
 template <class TensorNet, class PyClass>
 void registerBackendClassSpecificBindings(PyClass &pyclass) {
     registerGatesForTensorNet<TensorNet>(pyclass);
+    registerControlledGate<TensorNet, PyClass>(pyclass);
     using PrecisionT = typename TensorNet::PrecisionT; // TensorNet's precision
     using ParamT = PrecisionT; // Parameter's data precision
 
@@ -74,6 +129,8 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) {
                 tensor_network.getData(data_ptr, state.size());
             },
             "Copy StateVector data into a Numpy array.")
+        .def("applyControlledMatrix", &applyControlledMatrix<TensorNet>,
+             "Apply controlled operation")
         .def(
             "updateMPSSitesData",
             [](TensorNet &tensor_network, std::vector<np_arr_c> &tensors) {

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp
@@ -158,20 +158,11 @@ template <class PrecisionT> class TNCudaGateCache {
     }
 
     /**
-     * @brief Returns the key (index of the gate) of the first element in the
-     * `device_gates_`.
+     * @brief Returns the size of the `device_gates_`.
      *
-     * @return std::size_t Key of the first element in the `device_gates_`.
+     * @return std::size_t Size of `device_gates_`.
      */
-    auto get_cache_head_idx() const -> std::size_t {
-        auto it = device_gates_.begin();
-        return it->first;
-    }
-
-    /**
-     * @brief Returns if the `device_gates_` is empty.
-     */
-    auto is_empty() const -> bool { return device_gates_.empty(); }
+    auto size() const -> std::size_t { return device_gates_.size(); }
 
     /**
      * @brief Update an existing key with a new one.

diff --git a/...tning/core/src/simulators/lightning_tensor/tncuda/gates/tests/Test_MPSTNCuda_NonParam.cpp b/...tning/core/src/simulators/lightning_tensor/tncuda/gates/tests/Test_MPSTNCuda_NonParam.cpp
@@ -524,3 +524,64 @@ TEMPLATE_TEST_CASE("MPSTNCuda::Non_Param_Gates::2+_wires",
         }
     }
 }
+
+TEMPLATE_TEST_CASE("MPSTNCuda::applyControlledOperation non-param "
+                   "one-qubit with controls",
+                   "[MPSTNCuda]", float, double) {
+    using PrecisionT = TestType;
+    using ComplexT = std::complex<PrecisionT>;
+    const int num_qubits = 4;
+    std::size_t maxExtent = 2;
+    DevTag<int> dev_tag{0, 0};
+
+    const auto margin = PrecisionT{1e-5};
+    const std::size_t control = GENERATE(0, 1, 2, 3);
+    const std::size_t wire = GENERATE(0, 1, 2, 3);
+
+    MPSTNCuda<PrecisionT> mps_state0{num_qubits, maxExtent, dev_tag};
+    MPSTNCuda<PrecisionT> mps_state1{num_qubits, maxExtent, dev_tag};
+
+    DYNAMIC_SECTION("Controlled gates with base operation - "
+                    << "controls = {" << control << "} "
+                    << ", wires = {" << wire << "} - "
+                    << PrecisionToName<PrecisionT>::value) {
+        if (control != wire) {
+            mps_state0.applyControlledOperation(
+                "PauliX", std::vector<std::size_t>{control},
+                std::vector<bool>{true}, std::vector<std::size_t>{wire});
+
+            mps_state1.applyOperation(
+                "CNOT", std::vector<std::size_t>{control, wire}, false);
+
+            REQUIRE(mps_state0.getDataVector() ==
+                    approx(mps_state1.getDataVector()).margin(margin));
+        }
+    }
+
+    DYNAMIC_SECTION("Controlled gates with a target matrix - "
+                    << "controls = {" << control << "} "
+                    << ", wires = {" << wire << "} - "
+                    << PrecisionToName<PrecisionT>::value) {
+        if (control != wire) {
+            std::vector<ComplexT> gate_matrix = {
+                ComplexT{0.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0},
+                ComplexT{0.0, 0.0}};
+            mps_state0.applyControlledOperation(
+                "applyControlledGates", std::vector<std::size_t>{control},
+                std::vector<bool>{true}, std::vector<std::size_t>{wire}, false,
+                {}, gate_matrix);
+
+            mps_state1.applyOperation(
+                "CNOT", std::vector<std::size_t>{control, wire}, false);
+
+            REQUIRE(mps_state0.getDataVector() ==
+                    approx(mps_state1.getDataVector()).margin(margin));
+        }
+    }
+
+    SECTION("Throw exception for 1+ target wires gates") {
+        REQUIRE_THROWS_AS(mps_state0.applyControlledOperation(
+                              "CSWAP", {0}, {true, true}, {1, 2}),
+                          LightningException);
+    }
+}
diff --git a/pennylane_lightning/lightning_tensor/_tensornet.py b/pennylane_lightning/lightning_tensor/_tensornet.py
@@ -222,6 +222,36 @@ def _apply_basis_state(self, state, wires):
 
         self._tensornet.setBasisState(state)
 
+    def _apply_lightning_controlled(self, operation):
+        """Apply an arbitrary controlled operation to the state tensor. Note that `cutensornet` only supports controlled gates with a single wire target.
+
+        Args:
+            operation (~pennylane.operation.Operation): controlled operation to apply
+
+        Returns:
+            None
+        """
+        tensornet = self._tensornet
+
+        basename = operation.base.name
+        method = getattr(tensornet, f"{basename}", None)
+        control_wires = list(operation.control_wires)
+        control_values = operation.control_values
+        target_wires = list(operation.target_wires)
+        if method is not None:  # apply n-controlled specialized gate
+            inv = False
+            param = operation.parameters
+            method(control_wires, control_values, target_wires, inv, param)
+        else:  # apply gate as an n-controlled matrix
+            method = getattr(tensornet, "applyControlledMatrix")
+            method(
+                qml.matrix(operation.base),
+                control_wires,
+                control_values,
+                target_wires,
+                False,
+            )
+
     def _apply_lightning(self, operations):
         """Apply a list of operations to the quantum state.
 
@@ -247,7 +277,9 @@ def _apply_lightning(self, operations):
             method = getattr(tensornet, name, None)
             wires = list(operation.wires)
 
-            if method is not None:  # apply specialized gate
+            if isinstance(operation, qml.ops.Controlled) and len(list(operation.target_wires)) == 1:
+                self._apply_lightning_controlled(operation)
+            elif method is not None:  # apply specialized gate
                 param = operation.parameters
                 method(wires, invert_param, param)
             else:  # apply gate as a matrix

diff --git a/tests/test_gates.py b/tests/test_gates.py
@@ -420,8 +420,8 @@ def circuit():
 
 
 @pytest.mark.skipif(
-    device_name != "lightning.qubit",
-    reason="N-controlled operations only implemented in lightning.qubit.",
+    device_name not in ("lightning.qubit", "lightning.tensor"),
+    reason="N-controlled operations only implemented in lightning.qubit and lightning.tensor.",
 )
 @pytest.mark.parametrize(
     "operation",
@@ -458,8 +458,13 @@ def test_controlled_qubit_gates(operation, n_qubits, control_value, tol):
     """Test that multi-controlled gates are correctly applied to a state"""
     dev_def = qml.device("default.qubit", wires=n_qubits)
     dev = qml.device(device_name, wires=n_qubits)
-    threshold = 250
+    threshold = 5 if device_name == "lightning.tensor" else 250
     num_wires = max(operation.num_wires, 1)
+    if operation == qml.GlobalPhase and device_name == "lightning.tensor":
+        pytest.skip("GlobalPhase not implemented in lightning.tensor.")
+    if num_wires != 1 and device_name == "lightning.tensor":
+        pytest.skip("Multi-target wire controlled gates not implemented in lightning.tensor.")
+
     for n_wires in range(num_wires + 1, num_wires + 4):
         wire_lists = list(itertools.permutations(range(0, n_qubits), n_wires))
         n_perms = len(wire_lists) * n_wires