Add MPO support to lightning.tensor (#859)

### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [ ] Ensure that the test suite passes, by running `make test`. - [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [x] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** [SC-70902] MPO(Matrix Product Operator) is added to `lightning.tensor` for all gates support. Any 2 more wires gates and 1 more target wire controlled gates can be supported via `MPO`. Current implementation only accepts the decomposed gate data. For the python users, `MPO` decomposition is conducted in the python layer with `numpy`. Further improvement can be done by conducting MPO decomposition with `cusolver` for large unitary matrices. **Description of the Change:** **Benefits:** **Possible Drawbacks:** **Related GitHub Issues:** --------- Co-authored-by: ringo-but-quantum <[email protected]> Co-authored-by: Ali Asadi <[email protected]> Co-authored-by: Shiro-Raven <[email protected]> Co-authored-by: albi3ro <[email protected]> Co-authored-by: Vincent Michaud-Rioux <[email protected]> Co-authored-by: Luis Alfredo Nuñez Meneses <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: vincentmr <[email protected]> Co-authored-by: Amintor Dusko <[email protected]> Co-authored-by: Lee James O'Riordan <[email protected]> Co-authored-by: paul0403 <[email protected]> Co-authored-by: Raul Torres <[email protected]>
PennyLaneAI · Sep 18, 2024 · eb709d5 · eb709d5
1 parent 3718dd6
commit eb709d5
Show file tree

Hide file tree

Showing 24 changed files with 1,112 additions and 157 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ### New features since last release
 
+* Add Matrix Product Operator (MPO) for all gates support to `lightning.tensor`. Note current C++ implementation only works for MPO sites data provided by users. 
+  [(#859)](https://github.com/PennyLaneAI/pennylane-lightning/pull/859)
+
 * Add shot measurement support to `lightning.tensor`.
   [(#852)](https://github.com/PennyLaneAI/pennylane-lightning/pull/852)
 

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.39.0-dev25"
+__version__ = "0.39.0-dev26"
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPOTNCuda.cpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPOTNCuda.cpp
@@ -0,0 +1,20 @@
+// Copyright 2024 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "MPOTNCuda.hpp"
+
+using namespace Pennylane::LightningTensor::TNCuda;
+
+template class Pennylane::LightningTensor::TNCuda::MPOTNCuda<float>;
+template class Pennylane::LightningTensor::TNCuda::MPOTNCuda<double>;
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPOTNCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPOTNCuda.hpp
@@ -0,0 +1,227 @@
+// Copyright 2024 Xanadu Quantum Technologies Inc.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//     http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @file MPOTNCuda.hpp
+ * Class for cuTensorNet-backed Matrix Product Operator.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <cuComplex.h>
+#include <cutensornet.h>
+#include <vector>
+
+#include "TensorCuda.hpp"
+#include "tncudaError.hpp"
+#include "tncuda_helpers.hpp"
+
+namespace {
+namespace cuUtil = Pennylane::LightningGPU::Util;
+}
+
+namespace Pennylane::LightningTensor::TNCuda {
+
+/**
+ * @brief Class representing an Matrix Product Operator (MPO) object for the MPS
+ backend.
+ * Any gate tensor can be represented as an MPO tensor network in the context of
+ MPS. The gate tensor must be decomposed with respect to its target wires. Note
+ that the only local target wires are supported. The non-adjacent target wires
+ must be swapped to local before contructing the MPO tensor network.
+ * The MPO tensors' modes order in an open boundary condition are:
+   2              3              2
+   |              |              |
+   X--1--....--0--X--2--....--0--X
+   |              |              |
+   0              1              1
+
+ * The extents of the MPO tensors are [bondL, 2, bondR, 2]. The bondL of the
+ left side bound MPO tensor is 1 and the bondR of the right side bound MPO
+ tensor is 1.
+
+ * Note that the gate tensor should be permuted to ascending order and
+ decomposed into MPO sites before passing to this class. Preprocess and
+ postprocess with SWAP operations are required to ensure MPOs target at adjacent
+ wires and the target wires are correct.
+
+ * @tparam PrecisionT Floating point type.
+ */
+template <class PrecisionT> class MPOTNCuda {
+  private:
+    using ComplexT = std::complex<PrecisionT>;
+    using CFP_t = decltype(cuUtil::getCudaType(PrecisionT{}));
+
+    cutensornetNetworkOperator_t MPOOperator_;
+    cuDoubleComplex coeff_ =
+        make_cuDoubleComplex(1.0, 0.0); // default coefficient
+    cutensornetBoundaryCondition_t boundaryCondition_{
+        CUTENSORNET_BOUNDARY_CONDITION_OPEN}; // open boundary condition
+    int64_t componentIdx_;
+
+    std::vector<std::size_t> bondDims_;
+
+    std::size_t numMPOSites_;
+    std::vector<int32_t> MPO_modes_int32_;
+
+    std::vector<std::vector<int64_t>> modesExtents_int64_;
+    // TODO: Explore if tensors_ can be stored in a separate memory manager
+    // class
+    std::vector<std::shared_ptr<TensorCuda<PrecisionT>>> tensors_;
+
+    /**
+     * @brief Get a vector of pointers to extents of each site.
+     *
+     * @return std::vector<int64_t const *> Note int64_t const* is
+     * required by cutensornet backend.
+     */
+    [[nodiscard]] auto getModeExtentsPtr_() -> std::vector<int64_t const *> {
+        std::vector<int64_t const *> modeExtentsPtr_int64;
+        for (auto it = modesExtents_int64_.cbegin();
+             it != modesExtents_int64_.cend(); it++) {
+            modeExtentsPtr_int64.emplace_back(it->data());
+        }
+        return modeExtentsPtr_int64;
+    }
+
+    /**
+     * @brief Get a vector of pointers to tensor data of each site.
+     *
+     * @return std::vector<void *>
+     */
+    [[nodiscard]] auto getTensorsDataPtr_() -> std::vector<void *> {
+        std::vector<void *> tensorsDataPtr;
+        for (auto &tensor : tensors_) {
+            tensorsDataPtr.emplace_back(
+                reinterpret_cast<void *>(tensor->getDataBuffer().getData()));
+        }
+        return tensorsDataPtr;
+    }
+
+  public:
+    explicit MPOTNCuda(const std::vector<std::vector<ComplexT>> &tensors,
+                       const std::vector<std::size_t> &wires,
+                       const std::size_t maxMPOBondDim,
+                       const std::size_t numQubits,
+                       const cutensornetHandle_t &cutensornetHandle,
+                       const cudaDataType_t &cudaDataType,
+                       const DevTag<int> &dev_tag) {
+        PL_ABORT_IF_NOT(tensors.size() == wires.size(),
+                        "Number of tensors and wires must match.");
+
+        PL_ABORT_IF(maxMPOBondDim < 2,
+                    "Max MPO bond dimension must be at least 2.");
+
+        PL_ABORT_IF_NOT(std::is_sorted(wires.begin(), wires.end()),
+                        "Only sorted target wires is accepeted.");
+
+        PL_ABORT_IF_NOT(wires.size() == wires.back() - wires.front() + 1,
+                        "Only support local target wires.");
+
+        // Create an empty MPO tensor network operator. Note that the state
+        // extents are aligned with the quantum state.
+        PL_CUTENSORNET_IS_SUCCESS(cutensornetCreateNetworkOperator(
+            /* const cutensornetHandle_t */ cutensornetHandle,
+            /* int32_t */ static_cast<int32_t>(numQubits),
+            /* const int64_t stateModeExtents */
+            std::vector<int64_t>(numQubits, 2).data(),
+            /* cudaDataType_t */ cudaDataType,
+            /* cutensornetNetworkOperator_t */ &MPOOperator_));
+
+        numMPOSites_ = wires.size();
+
+        MPO_modes_int32_.resize(numMPOSites_);
+
+        std::iota(MPO_modes_int32_.begin(), MPO_modes_int32_.end(),
+                  wires.front());
+
+        std::transform(MPO_modes_int32_.begin(), MPO_modes_int32_.end(),
+                       MPO_modes_int32_.begin(),
+                       [&numQubits](const std::size_t mode) {
+                           return static_cast<int32_t>(numQubits - 1 - mode);
+                       });
+
+        // Ensure the modes are in ascending order
+        std::reverse(MPO_modes_int32_.begin(), MPO_modes_int32_.end());
+
+        for (std::size_t i = 0; i < numMPOSites_ - 1; i++) {
+            // Binary logarithm of the bond dimension required for the exact MPO
+            // decomposition
+            const std::size_t lg_bondDim_exact =
+                std::min(i + 1, numMPOSites_ - i - 1) *
+                2; // 1+1 (1 for bra and 1 for ket)
+
+            const std::size_t bondDim =
+                lg_bondDim_exact <= log2(maxMPOBondDim)
+                    ? (std::size_t{1} << lg_bondDim_exact)
+                    : maxMPOBondDim;
+
+            bondDims_.emplace_back(bondDim);
+        }
+
+        for (std::size_t i = 0; i < numMPOSites_; i++) {
+            const std::size_t bondDimR =
+                i < numMPOSites_ - 1 ? bondDims_[i] : 1;
+            const std::size_t bondDimL = i > 0 ? bondDims_[i - 1] : 1;
+
+            auto localModesExtents =
+                i == 0 ? std::vector<std::size_t>{2, bondDimR, 2}
+                : i == numMPOSites_ - 1
+                    ? std::vector<std::size_t>{bondDimL, 2, 2}
+                    : std::vector<std::size_t>{bondDimL, 2, bondDimR, 2};
+
+            modesExtents_int64_.emplace_back(
+                Pennylane::Util::cast_vector<std::size_t, int64_t>(
+                    localModesExtents));
+
+            tensors_.emplace_back(std::make_shared<TensorCuda<PrecisionT>>(
+                localModesExtents.size(), localModesExtents, localModesExtents,
+                dev_tag));
+
+            auto tensor_cu = cuUtil::complexToCu<ComplexT>(tensors[i]);
+            tensors_[i]->getDataBuffer().CopyHostDataToGpu(tensor_cu.data(),
+                                                           tensor_cu.size());
+        }
+
+        // Append MPO tensor network operator components
+        PL_CUTENSORNET_IS_SUCCESS(cutensornetNetworkOperatorAppendMPO(
+            /* const cutensornetHandle_t */ cutensornetHandle,
+            /* cutensornetNetworkOperator_t */ MPOOperator_,
+            /* const cuDoubleComplex */ coeff_,
+            /* int32_t numStateModes */ static_cast<int32_t>(numMPOSites_),
+            /* const int32_t stateModes[] */ MPO_modes_int32_.data(),
+            /* const int64_t *stateModeExtents[] */
+            getModeExtentsPtr_().data(),
+            /* const int64_t *tensorModeStrides[] */ nullptr,
+            /* const void * */
+            const_cast<const void **>(getTensorsDataPtr_().data()),
+            /* cutensornetBoundaryCondition_t */ boundaryCondition_,
+            /* int64_t * */ &componentIdx_));
+    }
+
+    auto getMPOOperator() const -> const cutensornetNetworkOperator_t & {
+        return MPOOperator_;
+    }
+
+    auto getBondDims() const -> const std::vector<std::size_t> & {
+        return bondDims_;
+    }
+
+    ~MPOTNCuda() {
+        PL_CUTENSORNET_IS_SUCCESS(
+            cutensornetDestroyNetworkOperator(MPOOperator_));
+    };
+};
+} // namespace Pennylane::LightningTensor::TNCuda
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp
@@ -29,6 +29,7 @@
 
 #include "DataBuffer.hpp"
 #include "DevTag.hpp"
+#include "MPOTNCuda.hpp"
 #include "TNCudaBase.hpp"
 #include "TensorCuda.hpp"
 #include "TensornetBase.hpp"
@@ -73,6 +74,9 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
 
     std::vector<TensorCuda<Precision>> tensors_out_;
 
+    std::vector<std::shared_ptr<MPOTNCuda<Precision>>> mpos_;
+    std::vector<std::size_t> mpo_ids_;
+
   public:
     using CFP_t = decltype(cuUtil::getCudaType(Precision{}));
     using ComplexT = std::complex<Precision>;
@@ -232,6 +236,77 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
         }
     };
 
+    /**
+     * @brief Apply an MPO operator with the gate's MPO decomposition data
+     * provided by the user to the compute graph.
+     *
+     * This API only works for the MPS backend.
+     *
+     * @param tensors The MPO representation of a gate. Each element in the
+     * outer vector represents a MPO tensor site.
+     * @param wires The wire indices of the gate acts on. The size of this
+     * vector should match the size of the `tensors` vector.
+     * @param max_mpo_bond_dim The maximum bond dimension of the MPO operator.
+     */
+    void applyMPOOperation(const std::vector<std::vector<ComplexT>> &tensors,
+                           const std::vector<std::size_t> &wires,
+                           const std::size_t max_mpo_bond_dim) {
+        PL_ABORT_IF_NOT(
+            tensors.size() == wires.size(),
+            "The number of tensors should be equal to the number of "
+            "wires.");
+
+        // Create a queue of wire pairs to apply SWAP gates and MPO local target
+        // wires
+        const auto [local_wires, swap_wires_queue] =
+            create_swap_wire_pair_queue(wires);
+
+        // Apply SWAP gates to ensure the following MPO operator targeting at
+        // local wires
+        if (swap_wires_queue.size() > 0) {
+            for_each(swap_wires_queue.begin(), swap_wires_queue.end(),
+                     [this](const auto &swap_wires) {
+                         for_each(swap_wires.begin(), swap_wires.end(),
+                                  [this](const auto &wire_pair) {
+                                      BaseType::applyOperation(
+                                          "SWAP", wire_pair, false);
+                                  });
+                     });
+        }
+
+        // Create a MPO object based on the host data from the user
+        mpos_.emplace_back(std::make_shared<MPOTNCuda<Precision>>(
+            tensors, local_wires, max_mpo_bond_dim, BaseType::getNumQubits(),
+            BaseType::getTNCudaHandle(), BaseType::getCudaDataType(),
+            BaseType::getDevTag()));
+
+        // Append the MPO operator to the compute graph
+        // Note MPO operator only works for local target wires as of v24.08
+        int64_t operatorId;
+        PL_CUTENSORNET_IS_SUCCESS(cutensornetStateApplyNetworkOperator(
+            /* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
+            /* cutensornetState_t */ BaseType::getQuantumState(),
+            /* cutensornetNetworkOperator_t */ mpos_.back()->getMPOOperator(),
+            /* const int32_t immutable */ 1,
+            /* const int32_t adjoint */ 0,
+            /* const int32_t unitary */ 1,
+            /* int64_t * operatorId*/ &operatorId));
+
+        mpo_ids_.push_back(static_cast<std::size_t>(operatorId));
+
+        // Apply SWAP gates to restore the original wire order
+        if (swap_wires_queue.size() > 0) {
+            for_each(swap_wires_queue.rbegin(), swap_wires_queue.rend(),
+                     [this](const auto &swap_wires) {
+                         for_each(swap_wires.rbegin(), swap_wires.rend(),
+                                  [this](const auto &wire_pair) {
+                                      BaseType::applyOperation(
+                                          "SWAP", wire_pair, false);
+                                  });
+                     });
+        }
+    }
+
     /**
      * @brief Append MPS final state to the quantum circuit.
      *
@@ -276,6 +351,21 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
             /* const void * */ &cutoff,
             /* std::size_t */ sizeof(cutoff)));
 
+        // MPO configurations
+        // Note that CUTENSORNET_STATE_MPO_APPLICATION_INEXACT is applied if the
+        // `cutoff` value is not set to 0 for the MPO application.
+        cutensornetStateMPOApplication_t mpo_attribute =
+            (cutoff == 0) ? CUTENSORNET_STATE_MPO_APPLICATION_EXACT
+                          : CUTENSORNET_STATE_MPO_APPLICATION_INEXACT;
+
+        PL_CUTENSORNET_IS_SUCCESS(cutensornetStateConfigure(
+            /* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
+            /* cutensornetState_t */ BaseType::getQuantumState(),
+            /* cutensornetStateAttributes_t */
+            CUTENSORNET_STATE_CONFIG_MPS_MPO_APPLICATION,
+            /* const void * */ &mpo_attribute,
+            /* std::size_t */ sizeof(mpo_attribute)));
+
         BaseType::computeState(
             const_cast<int64_t **>(getSitesExtentsPtr().data()),
             reinterpret_cast<void **>(getTensorsOutDataPtr().data()));