From a090167b192e2a1a790bb1aec99217a9df45b25a Mon Sep 17 00:00:00 2001
From: RobinTF <83676088+RobinTF@users.noreply.github.com>
Date: Mon, 4 Nov 2024 14:56:17 +0100
Subject: [PATCH 01/12] Lazy `TransitivePath` operation (#1595)

This PR enables the `TransitivePath` operation to yield its result lazily and to consume its left/right child lazily. Note that the graph which is transitively traversed needs to be fully materialized due to the underlying algorithm. E.G when computing the (large) result of `wdt:P31/wdt:P279*`, the large result and the `wdt:P31` can be dealt with lazily, but the full `wdt:P279` predicate needs to be materialized.
---
 src/engine/TransitivePathBase.cpp | 124 +++++-----
 src/engine/TransitivePathBase.h   |  65 +++--
 src/engine/TransitivePathImpl.h   | 375 +++++++++++++++--------------
 src/engine/idTable/IdTable.h      |   9 +-
 test/TransitivePathTest.cpp       | 381 +++++++++++++++++++++---------
 test/util/IdTableHelpers.cpp      |  12 +
 test/util/IdTableHelpers.h        |   5 +
 7 files changed, 591 insertions(+), 380 deletions(-)
diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp
index 63899fdb28..a833bfdfbd 100644
--- a/src/engine/TransitivePathBase.cpp
+++ b/src/engine/TransitivePathBase.cpp
@@ -63,76 +63,80 @@ TransitivePathBase::decideDirection() {
 }
 
 // _____________________________________________________________________________
-void TransitivePathBase::fillTableWithHull(IdTable& table, const Map& hull,
-                                           std::vector<Id>& nodes,
-                                           size_t startSideCol,
-                                           size_t targetSideCol,
-                                           const IdTable& startSideTable,
-                                           size_t skipCol) const {
-  CALL_FIXED_SIZE((std::array{table.numColumns(), startSideTable.numColumns()}),
-                  &TransitivePathBase::fillTableWithHullImpl, this, table, hull,
-                  nodes, startSideCol, targetSideCol, startSideTable, skipCol);
+Result::Generator TransitivePathBase::fillTableWithHull(
+    NodeGenerator hull, size_t startSideCol, size_t targetSideCol,
+    size_t skipCol, bool yieldOnce, size_t inputWidth) const {
+  return ad_utility::callFixedSize(
+      std::array{inputWidth, getResultWidth()},
+      [&]<size_t INPUT_WIDTH, size_t OUTPUT_WIDTH>() {
+        return fillTableWithHullImpl<INPUT_WIDTH, OUTPUT_WIDTH>(
+            std::move(hull), startSideCol, targetSideCol, yieldOnce, skipCol);
+      });
 }
 
 // _____________________________________________________________________________
-template <size_t WIDTH, size_t START_WIDTH>
-void TransitivePathBase::fillTableWithHullImpl(
-    IdTable& tableDyn, const Map& hull, std::vector<Id>& nodes,
-    size_t startSideCol, size_t targetSideCol, const IdTable& startSideTable,
-    size_t skipCol) const {
-  IdTableStatic<WIDTH> table = std::move(tableDyn).toStatic<WIDTH>();
-  IdTableView<START_WIDTH> startView =
-      startSideTable.asStaticView<START_WIDTH>();
-
-  size_t rowIndex = 0;
-  for (size_t i = 0; i < nodes.size(); i++) {
-    Id node = nodes[i];
-    auto it = hull.find(node);
-    if (it == hull.end()) {
-      continue;
-    }
-
-    for (Id otherNode : it->second) {
-      table.emplace_back();
-      table(rowIndex, startSideCol) = node;
-      table(rowIndex, targetSideCol) = otherNode;
-
-      copyColumns<START_WIDTH, WIDTH>(startView, table, i, rowIndex, skipCol);
-
-      rowIndex++;
-    }
-  }
-
-  tableDyn = std::move(table).toDynamic();
-}
-
-// _____________________________________________________________________________
-void TransitivePathBase::fillTableWithHull(IdTable& table, const Map& hull,
-                                           size_t startSideCol,
-                                           size_t targetSideCol) const {
-  CALL_FIXED_SIZE((std::array{table.numColumns()}),
-                  &TransitivePathBase::fillTableWithHullImpl, this, table, hull,
-                  startSideCol, targetSideCol);
+Result::Generator TransitivePathBase::fillTableWithHull(NodeGenerator hull,
+                                                        size_t startSideCol,
+                                                        size_t targetSideCol,
+                                                        bool yieldOnce) const {
+  return ad_utility::callFixedSize(getResultWidth(), [&]<size_t WIDTH>() {
+    return fillTableWithHullImpl<0, WIDTH>(std::move(hull), startSideCol,
+                                           targetSideCol, yieldOnce);
+  });
 }
 
 // _____________________________________________________________________________
-template <size_t WIDTH>
-void TransitivePathBase::fillTableWithHullImpl(IdTable& tableDyn,
-                                               const Map& hull,
-                                               size_t startSideCol,
-                                               size_t targetSideCol) const {
-  IdTableStatic<WIDTH> table = std::move(tableDyn).toStatic<WIDTH>();
-  size_t rowIndex = 0;
-  for (auto const& [node, linkedNodes] : hull) {
+template <size_t INPUT_WIDTH, size_t OUTPUT_WIDTH>
+Result::Generator TransitivePathBase::fillTableWithHullImpl(
+    NodeGenerator hull, size_t startSideCol, size_t targetSideCol,
+    bool yieldOnce, size_t skipCol) const {
+  ad_utility::Timer timer{ad_utility::Timer::Stopped};
+  size_t outputRow = 0;
+  IdTableStatic<OUTPUT_WIDTH> table{getResultWidth(), allocator()};
+  std::vector<LocalVocab> storedLocalVocabs;
+  for (auto& [node, linkedNodes, localVocab, idTable, inputRow] : hull) {
+    timer.cont();
+    // As an optimization nodes without any linked nodes should not get yielded
+    // in the first place.
+    AD_CONTRACT_CHECK(!linkedNodes.empty());
+    if (!yieldOnce) {
+      table.reserve(linkedNodes.size());
+    }
+    std::optional<IdTableView<INPUT_WIDTH>> inputView = std::nullopt;
+    if (idTable != nullptr) {
+      inputView = idTable->template asStaticView<INPUT_WIDTH>();
+    }
     for (Id linkedNode : linkedNodes) {
       table.emplace_back();
-      table(rowIndex, startSideCol) = node;
-      table(rowIndex, targetSideCol) = linkedNode;
+      table(outputRow, startSideCol) = node;
+      table(outputRow, targetSideCol) = linkedNode;
 
-      rowIndex++;
+      if (inputView.has_value()) {
+        copyColumns<INPUT_WIDTH, OUTPUT_WIDTH>(inputView.value(), table,
+                                               inputRow, outputRow, skipCol);
+      }
+
+      outputRow++;
     }
+
+    if (yieldOnce) {
+      storedLocalVocabs.emplace_back(std::move(localVocab));
+    } else {
+      timer.stop();
+      runtimeInfo().addDetail("IdTable fill time", timer.msecs());
+      co_yield {std::move(table).toDynamic(), std::move(localVocab)};
+      table = IdTableStatic<OUTPUT_WIDTH>{getResultWidth(), allocator()};
+      outputRow = 0;
+    }
+    timer.stop();
+  }
+  if (yieldOnce) {
+    timer.start();
+    LocalVocab mergedVocab{};
+    mergedVocab.mergeWith(storedLocalVocabs);
+    runtimeInfo().addDetail("IdTable fill time", timer.msecs());
+    co_yield {std::move(table).toDynamic(), std::move(mergedVocab)};
   }
-  tableDyn = std::move(table).toDynamic();
 }
 
 // _____________________________________________________________________________
@@ -405,7 +409,7 @@ void TransitivePathBase::copyColumns(const IdTableView<INPUT_WIDTH>& inputTable,
       continue;
     }
 
-    outputTable(outputRow, outCol) = inputTable(inputRow, inCol);
+    outputTable.at(outputRow, outCol) = inputTable.at(inputRow, inCol);
     inCol++;
     outCol++;
   }
diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h
index ce7c32ac3e..a223e06d95 100644
--- a/src/engine/TransitivePathBase.h
+++ b/src/engine/TransitivePathBase.h
@@ -69,6 +69,31 @@ using Map = std::unordered_map<
     Id, Set, HashId, std::equal_to<Id>,
     ad_utility::AllocatorWithLimit<std::pair<const Id, Set>>>;
 
+// Helper struct, that allows a generator to yield a a node and all its
+// connected nodes (the `targets`), along with a local vocabulary and the row
+// index of the node in the input table. The `IdTable` pointer might be null if
+// the `Id` is not associated with a table. In this case the `row` value does
+// not represent anything meaningful and should not be used.
+struct NodeWithTargets {
+  Id node_;
+  Set targets_;
+  LocalVocab localVocab_;
+  const IdTable* idTable_;
+  size_t row_;
+
+  // Explicit to prevent issues with co_yield and lifetime.
+  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103909 for more info.
+  NodeWithTargets(Id node, Set targets, LocalVocab localVocab,
+                  const IdTable* idTable, size_t row)
+      : node_{node},
+        targets_{std::move(targets)},
+        localVocab_{std::move(localVocab)},
+        idTable_{idTable},
+        row_{row} {}
+};
+
+using NodeGenerator = cppcoro::generator<NodeWithTargets>;
+
 /**
  * @class TransitivePathBase
  * @brief A common base class for different implementations of the Transitive
@@ -147,37 +172,36 @@ class TransitivePathBase : public Operation {
    * startSideTable to fill in the rest of the columns.
    * This function is called if the start side is bound and a variable.
    *
-   * @param table The result table which will be filled.
-   * @param hull The transitive hull.
-   * @param nodes The start nodes of the transitive hull. These need to be in
-   * the same order and amount as the starting side nodes in the startTable.
+   * @param hull The transitive hull, represented by a generator that yields
+   * sets of connected nodes with some metadata.
    * @param startSideCol The column of the result table for the startSide of the
    * hull
    * @param targetSideCol The column of the result table for the targetSide of
    * the hull
-   * @param startSideTable An IdTable that holds other results. The other
-   * results will be transferred to the new result table.
    * @param skipCol This column contains the Ids of the start side in the
    * startSideTable and will be skipped.
+   * @param yieldOnce If true, the generator will yield only a single time.
+   * @param inputWidth The width of the input table that is referenced by the
+   * elements of `hull`.
    */
-  void fillTableWithHull(IdTable& table, const Map& hull,
-                         std::vector<Id>& nodes, size_t startSideCol,
-                         size_t targetSideCol, const IdTable& startSideTable,
-                         size_t skipCol) const;
+  Result::Generator fillTableWithHull(NodeGenerator hull, size_t startSideCol,
+                                      size_t targetSideCol, size_t skipCol,
+                                      bool yieldOnce, size_t inputWidth) const;
 
   /**
    * @brief Fill the given table with the transitive hull.
    * This function is called if the sides are unbound or ids.
    *
-   * @param table The result table which will be filled.
    * @param hull The transitive hull.
    * @param startSideCol The column of the result table for the startSide of the
    * hull
    * @param targetSideCol The column of the result table for the targetSide of
    * the hull
+   * @param yieldOnce If true, the generator will yield only a single time.
    */
-  void fillTableWithHull(IdTable& table, const Map& hull, size_t startSideCol,
-                         size_t targetSideCol) const;
+  Result::Generator fillTableWithHull(NodeGenerator hull, size_t startSideCol,
+                                      size_t targetSideCol,
+                                      bool yieldOnce) const;
 
   // Copy the columns from the input table to the output table
   template <size_t INPUT_WIDTH, size_t OUTPUT_WIDTH>
@@ -204,16 +228,11 @@ class TransitivePathBase : public Operation {
  private:
   uint64_t getSizeEstimateBeforeLimit() override;
 
-  template <size_t WIDTH, size_t START_WIDTH>
-  void fillTableWithHullImpl(IdTable& table, const Map& hull,
-                             std::vector<Id>& nodes, size_t startSideCol,
-                             size_t targetSideCol,
-                             const IdTable& startSideTable,
-                             size_t skipCol) const;
-
-  template <size_t WIDTH>
-  void fillTableWithHullImpl(IdTable& table, const Map& hull,
-                             size_t startSideCol, size_t targetSideCol) const;
+  template <size_t INPUT_WIDTH, size_t OUTPUT_WIDTH>
+  Result::Generator fillTableWithHullImpl(NodeGenerator hull,
+                                          size_t startSideCol,
+                                          size_t targetSideCol, bool yieldOnce,
+                                          size_t skipCol = 0) const;
 
  public:
   size_t getCostEstimate() override;
diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h
index 55ce45ba4d..407b63a298 100644
--- a/src/engine/TransitivePathImpl.h
+++ b/src/engine/TransitivePathImpl.h
@@ -11,6 +11,25 @@
 #include "util/Exception.h"
 #include "util/Timer.h"
 
+namespace detail {
+
+// Helper struct that allows to group a read-only view of a column of a table
+// with a reference to the table itself and a local vocabulary (used to ensure
+// the correct lifetime).
+template <typename ColumnType>
+struct TableColumnWithVocab {
+  const IdTable* table_;
+  ColumnType column_;
+  LocalVocab vocab_;
+
+  // Explicit to prevent issues with co_yield and lifetime.
+  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103909 for more info.
+  TableColumnWithVocab(const IdTable* table, ColumnType column,
+                       LocalVocab vocab)
+      : table_{table}, column_{std::move(column)}, vocab_{std::move(vocab)} {};
+};
+};  // namespace detail
+
 /**
  * @class TransitivePathImpl
  * @brief This class implements common functions for the concrete TransitivePath
@@ -22,6 +41,9 @@
  */
 template <typename T>
 class TransitivePathImpl : public TransitivePathBase {
+  using TableColumnWithVocab =
+      detail::TableColumnWithVocab<std::span<const Id>>;
+
  public:
   TransitivePathImpl(QueryExecutionContext* qec,
                      std::shared_ptr<QueryExecutionTree> child,
@@ -36,100 +58,88 @@ class TransitivePathImpl : public TransitivePathBase {
    * it is a variable. The other IdTable contains the result
    * of the start side and will be used to get the start nodes.
    *
-   * @tparam RES_WIDTH Number of columns of the result table
-   * @tparam SUB_WIDTH Number of columns of the sub table
-   * @tparam SIDE_WIDTH Number of columns of the
-   * @param res The result table which will be filled in-place
-   * @param sub The IdTable for the sub result
+   * @param sub A shared pointer to the sub result. Needs to be kept alive for
+   * the lifetime of this generator.
    * @param startSide The start side for the transitive hull
    * @param targetSide The target side for the transitive hull
-   * @param startSideTable The IdTable of the startSide
+   * @param startSideResult The Result of the startSide
+   * @param yieldOnce If true, the generator will yield only a single time.
    */
-  template <size_t RES_WIDTH, size_t SUB_WIDTH, size_t SIDE_WIDTH>
-  void computeTransitivePathBound(IdTable* dynRes, const IdTable& dynSub,
-                                  const TransitivePathSide& startSide,
-                                  const TransitivePathSide& targetSide,
-                                  const IdTable& startSideTable) const {
-    auto timer = ad_utility::Timer(ad_utility::Timer::Stopped);
-    timer.start();
-
-    auto [edges, nodes] = setupMapAndNodes<SUB_WIDTH, SIDE_WIDTH>(
-        dynSub, startSide, targetSide, startSideTable);
-
-    timer.stop();
-    auto initTime = timer.msecs();
-    timer.start();
-
-    Map hull(allocator());
-    if (!targetSide.isVariable()) {
-      hull = transitiveHull(edges, nodes, std::get<Id>(targetSide.value_));
-    } else {
-      hull = transitiveHull(edges, nodes, std::nullopt);
+  Result::Generator computeTransitivePathBound(
+      std::shared_ptr<const Result> sub, const TransitivePathSide& startSide,
+      const TransitivePathSide& targetSide,
+      std::shared_ptr<const Result> startSideResult, bool yieldOnce) const {
+    ad_utility::Timer timer{ad_utility::Timer::Started};
+
+    auto edges = setupEdgesMap(sub->idTable(), startSide, targetSide);
+    auto nodes = setupNodes(startSide, std::move(startSideResult));
+    // Setup nodes returns a generator, so this time measurement won't include
+    // the time for each iteration, but every iteration step should have
+    // constant overhead, which should be safe to ignore.
+    runtimeInfo().addDetail("Initialization time", timer.msecs().count());
+
+    NodeGenerator hull =
+        transitiveHull(edges, sub->getCopyOfLocalVocab(), std::move(nodes),
+                       targetSide.isVariable()
+                           ? std::nullopt
+                           : std::optional{std::get<Id>(targetSide.value_)});
+
+    auto result = fillTableWithHull(
+        std::move(hull), startSide.outputCol_, targetSide.outputCol_,
+        startSide.treeAndCol_.value().second, yieldOnce,
+        startSide.treeAndCol_.value().first->getResultWidth());
+
+    // Iterate over generator to prevent lifetime issues
+    for (auto& pair : result) {
+      co_yield pair;
     }
-
-    timer.stop();
-    auto hullTime = timer.msecs();
-    timer.start();
-
-    fillTableWithHull(*dynRes, hull, nodes, startSide.outputCol_,
-                      targetSide.outputCol_, startSideTable,
-                      startSide.treeAndCol_.value().second);
-
-    timer.stop();
-    auto fillTime = timer.msecs();
-
-    auto& info = runtimeInfo();
-    info.addDetail("Initialization time", initTime.count());
-    info.addDetail("Hull time", hullTime.count());
-    info.addDetail("IdTable fill time", fillTime.count());
   };
 
   /**
    * @brief Compute the transitive hull.
    * This function is called when no side is bound (or an id).
    *
-   * @tparam RES_WIDTH Number of columns of the result table
-   * @tparam SUB_WIDTH Number of columns of the sub table
-   * @param res The result table which will be filled in-place
-   * @param sub The IdTable for the sub result
+   * @param sub A shared pointer to the sub result. Needs to be kept alive for
+   * the lifetime of this generator.
    * @param startSide The start side for the transitive hull
    * @param targetSide The target side for the transitive hull
+   * @param yieldOnce If true, the generator will yield only a single time.
    */
 
-  template <size_t RES_WIDTH, size_t SUB_WIDTH>
-  void computeTransitivePath(IdTable* dynRes, const IdTable& dynSub,
-                             const TransitivePathSide& startSide,
-                             const TransitivePathSide& targetSide) const {
-    auto timer = ad_utility::Timer(ad_utility::Timer::Stopped);
-    timer.start();
+  Result::Generator computeTransitivePath(std::shared_ptr<const Result> sub,
+                                          const TransitivePathSide& startSide,
+                                          const TransitivePathSide& targetSide,
+                                          bool yieldOnce) const {
+    ad_utility::Timer timer{ad_utility::Timer::Started};
+
+    auto edges = setupEdgesMap(sub->idTable(), startSide, targetSide);
+    auto nodesWithDuplicates =
+        setupNodes(sub->idTable(), startSide, targetSide);
+    Set nodesWithoutDuplicates{allocator()};
+    for (const auto& span : nodesWithDuplicates) {
+      nodesWithoutDuplicates.insert(span.begin(), span.end());
+    }
 
-    auto [edges, nodes] =
-        setupMapAndNodes<SUB_WIDTH>(dynSub, startSide, targetSide);
+    runtimeInfo().addDetail("Initialization time", timer.msecs());
 
-    timer.stop();
-    auto initTime = timer.msecs();
-    timer.start();
+    // Technically we should pass the localVocab of `sub` here, but this will
+    // just lead to a merge with itself later on in the pipeline.
+    detail::TableColumnWithVocab<const Set&> tableInfo{
+        nullptr, nodesWithoutDuplicates, LocalVocab{}};
 
-    Map hull{allocator()};
-    if (!targetSide.isVariable()) {
-      hull = transitiveHull(edges, nodes, std::get<Id>(targetSide.value_));
-    } else {
-      hull = transitiveHull(edges, nodes, std::nullopt);
-    }
-
-    timer.stop();
-    auto hullTime = timer.msecs();
-    timer.start();
+    NodeGenerator hull = transitiveHull(
+        edges, sub->getCopyOfLocalVocab(), std::span{&tableInfo, 1},
+        targetSide.isVariable()
+            ? std::nullopt
+            : std::optional{std::get<Id>(targetSide.value_)});
 
-    fillTableWithHull(*dynRes, hull, startSide.outputCol_,
-                      targetSide.outputCol_);
-    timer.stop();
-    auto fillTime = timer.msecs();
+    auto result = fillTableWithHull(std::move(hull), startSide.outputCol_,
+                                    targetSide.outputCol_, yieldOnce);
 
-    auto& info = runtimeInfo();
-    info.addDetail("Initialization time", initTime.count());
-    info.addDetail("Hull time", hullTime.count());
-    info.addDetail("IdTable fill time", fillTime.count());
+    // Iterate over generator to prevent lifetime issues
+    for (auto& pair : result) {
+      co_yield pair;
+    }
   };
 
  protected:
@@ -142,7 +152,7 @@ class TransitivePathImpl : public TransitivePathBase {
    *
    * @return Result The result of the TransitivePath operation
    */
-  ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override {
+  ProtoResult computeResult(bool requestLaziness) override {
     if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() &&
         rhs_.isVariable()) {
       AD_THROW(
@@ -151,161 +161,170 @@ class TransitivePathImpl : public TransitivePathBase {
           "not supported");
     }
     auto [startSide, targetSide] = decideDirection();
-    std::shared_ptr<const Result> subRes = subtree_->getResult();
-
-    IdTable idTable{allocator()};
-
-    idTable.setNumColumns(getResultWidth());
-
-    size_t subWidth = subRes->idTable().numColumns();
+    // In order to traverse the graph represented by this result, we need random
+    // access across the whole table, so it doesn't make sense to lazily compute
+    // the result.
+    std::shared_ptr<const Result> subRes = subtree_->getResult(false);
 
     if (startSide.isBoundVariable()) {
       std::shared_ptr<const Result> sideRes =
-          startSide.treeAndCol_.value().first->getResult();
-      size_t sideWidth = sideRes->idTable().numColumns();
+          startSide.treeAndCol_.value().first->getResult(true);
 
-      CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}),
-                      &TransitivePathImpl<T>::computeTransitivePathBound, this,
-                      &idTable, subRes->idTable(), startSide, targetSide,
-                      sideRes->idTable());
+      auto gen =
+          computeTransitivePathBound(std::move(subRes), startSide, targetSide,
+                                     std::move(sideRes), !requestLaziness);
 
-      return {std::move(idTable), resultSortedOn(),
-              Result::getMergedLocalVocab(*sideRes, *subRes)};
+      return requestLaziness
+                 ? ProtoResult{std::move(gen), resultSortedOn()}
+                 : ProtoResult{cppcoro::getSingleElement(std::move(gen)),
+                               resultSortedOn()};
     }
-    CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}),
-                    &TransitivePathImpl<T>::computeTransitivePath, this,
-                    &idTable, subRes->idTable(), startSide, targetSide);
-
-    // NOTE: The only place, where the input to a transitive path operation is
-    // not an index scan (which has an empty local vocabulary by default) is the
-    // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here
-    // either.
-    return {std::move(idTable), resultSortedOn(),
-            subRes->getSharedLocalVocab()};
-  };
+    auto gen = computeTransitivePath(std::move(subRes), startSide, targetSide,
+                                     !requestLaziness);
+    return requestLaziness
+               ? ProtoResult{std::move(gen), resultSortedOn()}
+               : ProtoResult{cppcoro::getSingleElement(std::move(gen)),
+                             resultSortedOn()};
+  }
 
   /**
-   * @brief Compute the transitive hull starting at the given nodes,
-   * using the given Map.
-   *
-   * @param edges Adjacency lists, mapping Ids (nodes) to their connected
+   * @brief Depth-first search to find connected nodes in the graph.
+   * @param edges The adjacency lists, mapping Ids (nodes) to their connected
    * Ids.
-   * @param nodes A list of Ids. These Ids are used as starting points for the
-   * transitive hull. Thus, this parameter guides the performance of this
-   * algorithm.
-   * @param target Optional target Id. If supplied, only paths which end
-   * in this Id are added to the hull.
-   * @return Map Maps each Id to its connected Ids in the transitive hull
+   * @param startNode The node to start the search from.
+   * @param target Optional target Id. If supplied, only paths which end in this
+   * Id are added to the result.
+   * @return A set of connected nodes in the graph.
    */
-  Map transitiveHull(const T& edges, const std::vector<Id>& startNodes,
-                     std::optional<Id> target) const {
-    // For every node do a dfs on the graph
-    Map hull{allocator()};
-
+  Set findConnectedNodes(const T& edges, Id startNode,
+                         const std::optional<Id>& target) const {
     std::vector<std::pair<Id, size_t>> stack;
     ad_utility::HashSetWithMemoryLimit<Id> marks{
         getExecutionContext()->getAllocator()};
-    for (auto startNode : startNodes) {
-      if (hull.contains(startNode)) {
-        // We have already computed the hull for this node
-        continue;
-      }
+    Set connectedNodes{getExecutionContext()->getAllocator()};
+    stack.emplace_back(startNode, 0);
 
-      marks.clear();
-      stack.clear();
-      stack.push_back({startNode, 0});
+    if (minDist_ == 0 && (!target.has_value() || startNode == target.value())) {
+      connectedNodes.insert(startNode);
+    }
 
-      if (minDist_ == 0 &&
-          (!target.has_value() || startNode == target.value())) {
-        insertIntoMap(hull, startNode, startNode);
-      }
+    while (!stack.empty()) {
+      checkCancellation();
+      auto [node, steps] = stack.back();
+      stack.pop_back();
 
-      while (!stack.empty()) {
-        checkCancellation();
-        auto [node, steps] = stack.back();
-        stack.pop_back();
-
-        if (steps <= maxDist_ && marks.count(node) == 0) {
-          if (steps >= minDist_) {
-            marks.insert(node);
-            if (!target.has_value() || node == target.value()) {
-              insertIntoMap(hull, startNode, node);
-            }
+      if (steps <= maxDist_ && marks.count(node) == 0) {
+        if (steps >= minDist_) {
+          marks.insert(node);
+          if (!target.has_value() || node == target.value()) {
+            connectedNodes.insert(node);
           }
+        }
 
-          const auto& successors = edges.successors(node);
-          for (auto successor : successors) {
-            stack.push_back({successor, steps + 1});
-          }
+        const auto& successors = edges.successors(node);
+        for (auto successor : successors) {
+          stack.emplace_back(successor, steps + 1);
+        }
+      }
+    }
+    return connectedNodes;
+  }
+
+  /**
+   * @brief Compute the transitive hull starting at the given nodes,
+   * using the given Map.
+   *
+   * @param edges Adjacency lists, mapping Ids (nodes) to their connected
+   * Ids.
+   * @param startNodes A range that yields an instantiation of
+   * `TableColumnWithVocab` that can be consumed to create a transitive hull.
+   * @param target Optional target Id. If supplied, only paths which end
+   * in this Id are added to the hull.
+   * @return Map Maps each Id to its connected Ids in the transitive hull
+   */
+  NodeGenerator transitiveHull(const T& edges, LocalVocab edgesVocab,
+                               std::ranges::range auto startNodes,
+                               std::optional<Id> target) const {
+    ad_utility::Timer timer{ad_utility::Timer::Stopped};
+    for (auto&& tableColumn : startNodes) {
+      timer.cont();
+      LocalVocab mergedVocab = std::move(tableColumn.vocab_);
+      mergedVocab.mergeWith(std::span{&edgesVocab, 1});
+      size_t currentRow = 0;
+      for (Id startNode : tableColumn.column_) {
+        Set connectedNodes = findConnectedNodes(edges, startNode, target);
+        if (!connectedNodes.empty()) {
+          runtimeInfo().addDetail("Hull time", timer.msecs());
+          timer.stop();
+          co_yield NodeWithTargets{startNode, std::move(connectedNodes),
+                                   mergedVocab.clone(), tableColumn.table_,
+                                   currentRow};
+          timer.cont();
         }
+        currentRow++;
       }
+      timer.stop();
     }
-    return hull;
   }
 
   /**
    * @brief Prepare a Map and a nodes vector for the transitive hull
    * computation.
    *
-   * @tparam SUB_WIDTH Number of columns of the sub table
    * @param sub The sub table result
    * @param startSide The TransitivePathSide where the edges start
    * @param targetSide The TransitivePathSide where the edges end
-   * @return std::pair<Map, std::vector<Id>> A Map and Id vector (nodes) for the
-   * transitive hull computation
+   * @return std::vector<std::span<const Id>> An vector of spans of (nodes) for
+   * the transitive hull computation
    */
-  template <size_t SUB_WIDTH>
-  std::pair<T, std::vector<Id>> setupMapAndNodes(
+  std::vector<std::span<const Id>> setupNodes(
       const IdTable& sub, const TransitivePathSide& startSide,
       const TransitivePathSide& targetSide) const {
-    std::vector<Id> nodes;
-    auto edges = setupEdgesMap(sub, startSide, targetSide);
+    std::vector<std::span<const Id>> result;
 
     // id -> var|id
     if (!startSide.isVariable()) {
-      nodes.push_back(std::get<Id>(startSide.value_));
+      result.emplace_back(&std::get<Id>(startSide.value_), 1);
       // var -> var
     } else {
       std::span<const Id> startNodes = sub.getColumn(startSide.subCol_);
-      // TODO<C++23> Use ranges::to.
-      nodes.insert(nodes.end(), startNodes.begin(), startNodes.end());
+      result.emplace_back(startNodes);
       if (minDist_ == 0) {
         std::span<const Id> targetNodes = sub.getColumn(targetSide.subCol_);
-        nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end());
+        result.emplace_back(targetNodes);
       }
     }
 
-    return {std::move(edges), std::move(nodes)};
+    return result;
   };
 
   /**
    * @brief Prepare a Map and a nodes vector for the transitive hull
    * computation.
    *
-   * @tparam SUB_WIDTH Number of columns of the sub table
-   * @tparam SIDE_WIDTH Number of columns of the startSideTable
-   * @param sub The sub table result
    * @param startSide The TransitivePathSide where the edges start
-   * @param targetSide The TransitivePathSide where the edges end
    * @param startSideTable An IdTable containing the Ids for the startSide
-   * @return std::pair<Map, std::vector<Id>> A Map and Id vector (nodes) for the
-   * transitive hull computation
+   * @return cppcoro::generator<TableColumnWithVocab> An generator for
+   * the transitive hull computation
    */
-  template <size_t SUB_WIDTH, size_t SIDE_WIDTH>
-  std::pair<T, std::vector<Id>> setupMapAndNodes(
-      const IdTable& sub, const TransitivePathSide& startSide,
-      const TransitivePathSide& targetSide,
-      const IdTable& startSideTable) const {
-    std::vector<Id> nodes;
-    auto edges = setupEdgesMap(sub, startSide, targetSide);
-
-    // Bound -> var|id
-    std::span<const Id> startNodes =
-        startSideTable.getColumn(startSide.treeAndCol_.value().second);
-    // TODO<C++23> Use ranges::to.
-    nodes.insert(nodes.end(), startNodes.begin(), startNodes.end());
-
-    return {std::move(edges), std::move(nodes)};
+  cppcoro::generator<TableColumnWithVocab> setupNodes(
+      const TransitivePathSide& startSide,
+      std::shared_ptr<const Result> startSideResult) const {
+    if (startSideResult->isFullyMaterialized()) {
+      // Bound -> var|id
+      std::span<const Id> startNodes = startSideResult->idTable().getColumn(
+          startSide.treeAndCol_.value().second);
+      co_yield TableColumnWithVocab{&startSideResult->idTable(), startNodes,
+                                    startSideResult->getCopyOfLocalVocab()};
+    } else {
+      for (auto& [idTable, localVocab] : startSideResult->idTables()) {
+        // Bound -> var|id
+        std::span<const Id> startNodes =
+            idTable.getColumn(startSide.treeAndCol_.value().second);
+        co_yield TableColumnWithVocab{&idTable, startNodes,
+                                      std::move(localVocab)};
+      }
+    }
   };
 
   virtual T setupEdgesMap(const IdTable& dynSub,
diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h
index c615e8350c..c76ee1b9d6 100644
--- a/src/engine/idTable/IdTable.h
+++ b/src/engine/idTable/IdTable.h
@@ -330,9 +330,16 @@ class IdTable {
   T& at(size_t row, size_t column) requires(!isView) {
     return data().at(column).at(row);
   }
-  const T& at(size_t row, size_t column) const {
+  // TODO<C++26> Remove overload for `isView` and drop requires clause.
+  const T& at(size_t row, size_t column) const requires(!isView) {
     return data().at(column).at(row);
   }
+  // `std::span::at` is a C++26 feature, so we have to implement it ourselves.
+  const T& at(size_t row, size_t column) const requires(isView) {
+    const auto& col = data().at(column);
+    AD_CONTRACT_CHECK(row < col.size());
+    return col[row];
+  }
 
   // Get a reference to the `i`-th row. The returned proxy objects can be
   // implicitly and trivially converted to `row_reference`. For the design
diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp
index e616ad2e2b..2e6da1855b 100644
--- a/test/TransitivePathTest.cpp
+++ b/test/TransitivePathTest.cpp
@@ -4,7 +4,6 @@
 //         Johannes Herrmann (johannes.r.herrmann(at)gmail.com)
 
 #include <gmock/gmock.h>
-#include <gtest/gtest.h>
 
 #include <limits>
 #include <memory>
@@ -14,7 +13,6 @@
 #include "engine/QueryExecutionTree.h"
 #include "engine/TransitivePathBase.h"
 #include "engine/ValuesForTesting.h"
-#include "gtest/gtest.h"
 #include "util/GTestHelpers.h"
 #include "util/IdTableHelpers.h"
 #include "util/IndexTestHelpers.h"
@@ -26,13 +24,17 @@ using Vars = std::vector<std::optional<Variable>>;
 
 }  // namespace
 
-class TransitivePathTest : public testing::TestWithParam<bool> {
+// The first bool indicates if binary search should be used (true) or hash map
+// based search (false). The second bool indicates if the result should be
+// requested lazily.
+class TransitivePathTest
+    : public testing::TestWithParam<std::tuple<bool, bool>> {
  public:
   [[nodiscard]] static std::pair<std::shared_ptr<TransitivePathBase>,
                                  QueryExecutionContext*>
   makePath(IdTable input, Vars vars, TransitivePathSide left,
            TransitivePathSide right, size_t minDist, size_t maxDist) {
-    bool useBinSearch = GetParam();
+    bool useBinSearch = std::get<0>(GetParam());
     auto qec = getQec();
     auto subtree = ad_utility::makeExecutionTree<ValuesForTesting>(
         qec, std::move(input), vars);
@@ -42,6 +44,7 @@ class TransitivePathTest : public testing::TestWithParam<bool> {
             qec};
   }
 
+  // ___________________________________________________________________________
   [[nodiscard]] static std::shared_ptr<TransitivePathBase> makePathUnbound(
       IdTable input, Vars vars, TransitivePathSide left,
       TransitivePathSide right, size_t minDist, size_t maxDist) {
@@ -50,29 +53,75 @@ class TransitivePathTest : public testing::TestWithParam<bool> {
     return T;
   }
 
-  [[nodiscard]] static std::shared_ptr<TransitivePathBase> makePathLeftBound(
-      IdTable input, Vars vars, IdTable sideTable, size_t sideTableCol,
-      Vars sideVars, TransitivePathSide left, TransitivePathSide right,
-      size_t minDist, size_t maxDist) {
+  // Create bound transitive path with a side table that is either a single
+  // table or multiple ones.
+  [[nodiscard]] static std::shared_ptr<TransitivePathBase> makePathBound(
+      bool isLeft, IdTable input, Vars vars,
+      std::variant<IdTable, std::vector<IdTable>> sideTable,
+      size_t sideTableCol, Vars sideVars, TransitivePathSide left,
+      TransitivePathSide right, size_t minDist, size_t maxDist,
+      bool forceFullyMaterialized = false) {
     auto [T, qec] = makePath(std::move(input), vars, std::move(left),
                              std::move(right), minDist, maxDist);
-    auto leftOp = ad_utility::makeExecutionTree<ValuesForTesting>(
-        qec, std::move(sideTable), sideVars);
-    return T->bindLeftSide(leftOp, sideTableCol);
+    auto operation =
+        std::holds_alternative<IdTable>(sideTable)
+            ? ad_utility::makeExecutionTree<ValuesForTesting>(
+                  qec, std::move(std::get<IdTable>(sideTable)), sideVars, false,
+                  std::vector<ColumnIndex>{sideTableCol}, LocalVocab{},
+                  std::nullopt, forceFullyMaterialized)
+            : ad_utility::makeExecutionTree<ValuesForTesting>(
+                  qec, std::move(std::get<std::vector<IdTable>>(sideTable)),
+                  sideVars, false, std::vector<ColumnIndex>{sideTableCol});
+    return isLeft ? T->bindLeftSide(operation, sideTableCol)
+                  : T->bindRightSide(operation, sideTableCol);
   }
 
-  [[nodiscard]] static std::shared_ptr<TransitivePathBase> makePathRightBound(
-      IdTable input, Vars vars, IdTable sideTable, size_t sideTableCol,
-      Vars sideVars, TransitivePathSide left, TransitivePathSide right,
-      size_t minDist, size_t maxDist) {
-    auto [T, qec] = makePath(std::move(input), vars, std::move(left),
-                             std::move(right), minDist, maxDist);
-    auto rightOp = ad_utility::makeExecutionTree<ValuesForTesting>(
-        qec, std::move(sideTable), sideVars);
-    return T->bindRightSide(rightOp, sideTableCol);
+  // ___________________________________________________________________________
+  static std::vector<IdTable> split(const IdTable& idTable) {
+    std::vector<IdTable> result;
+    for (const auto& row : idTable) {
+      result.emplace_back(idTable.numColumns(), idTable.getAllocator());
+      result.back().push_back(row);
+    }
+    return result;
+  }
+
+  // ___________________________________________________________________________
+  static bool requestLaziness() { return std::get<1>(GetParam()); }
+
+  // ___________________________________________________________________________
+  void assertResultMatchesIdTable(const Result& result, const IdTable& expected,
+                                  ad_utility::source_location loc =
+                                      ad_utility::source_location::current()) {
+    auto t = generateLocationTrace(loc);
+    using ::testing::UnorderedElementsAreArray;
+    ASSERT_NE(result.isFullyMaterialized(), requestLaziness());
+    if (requestLaziness()) {
+      const auto& [idTable, localVocab] =
+          aggregateTables(std::move(result.idTables()), expected.numColumns());
+      EXPECT_THAT(idTable, UnorderedElementsAreArray(expected));
+    } else {
+      EXPECT_THAT(result.idTable(), UnorderedElementsAreArray(expected));
+    }
+  }
+
+  // Call testCase three times with differing arguments. This is used to test
+  // scenarios where the same input table is delivered in different splits
+  // either wrapped within a generator or as a single table.
+  static void runTestWithForcedSideTableScenarios(
+      const std::invocable<std::variant<IdTable, std::vector<IdTable>>,
+                           bool> auto& testCase,
+      IdTable idTable,
+      ad_utility::source_location loc =
+          ad_utility::source_location::current()) {
+    auto trace = generateLocationTrace(loc);
+    testCase(idTable.clone(), false);
+    testCase(split(idTable), false);
+    testCase(idTable.clone(), true);
   }
 };
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, idToId) {
   auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}});
 
@@ -84,11 +133,11 @@ TEST_P(TransitivePathTest, idToId) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, idToVar) {
   auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}});
 
@@ -100,11 +149,11 @@ TEST_P(TransitivePathTest, idToVar) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, varToId) {
   auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}});
 
@@ -120,11 +169,11 @@ TEST_P(TransitivePathTest, varToId) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, idToVarMinLengthZero) {
   auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}});
 
@@ -136,11 +185,11 @@ TEST_P(TransitivePathTest, idToVarMinLengthZero) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 0, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, varToIdMinLengthZero) {
   auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}});
 
@@ -157,11 +206,11 @@ TEST_P(TransitivePathTest, varToIdMinLengthZero) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 0, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, varTovar) {
   auto sub = makeIdTableFromVector({
       {0, 1},
@@ -185,11 +234,11 @@ TEST_P(TransitivePathTest, varTovar) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, unlimitedMaxLength) {
   auto sub = makeIdTableFromVector({{0, 2},
                                     {2, 4},
@@ -225,11 +274,11 @@ TEST_P(TransitivePathTest, unlimitedMaxLength) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, std::numeric_limits<size_t>::max());
 
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, idToLeftBound) {
   auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}, {3, 4}});
 
@@ -247,29 +296,33 @@ TEST_P(TransitivePathTest, idToLeftBound) {
 
   TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0);
   TransitivePathSide right(std::nullopt, 1, V(4), 1);
-  {
-    auto T = makePathLeftBound(
-        sub.clone(), {Variable{"?start"}, Variable{"?target"}},
-        leftOpTable.clone(), 1, {Variable{"?x"}, Variable{"?start"}}, left,
-        right, 0, std::numeric_limits<size_t>::max());
-
-    auto resultTable = T->computeResultOnlyForTesting();
-    ASSERT_THAT(resultTable.idTable(),
-                ::testing::UnorderedElementsAreArray(expected));
-  }
-  {
-    auto T = makePathLeftBound(
-        std::move(sub), {Variable{"?start"}, Variable{"?target"}},
-        std::move(leftOpTable), 1, {std::nullopt, Variable{"?start"}},
-        std::move(left), std::move(right), 0,
-        std::numeric_limits<size_t>::max());
-
-    auto resultTable = T->computeResultOnlyForTesting();
-    ASSERT_THAT(resultTable.idTable(),
-                ::testing::UnorderedElementsAreArray(expected));
-  }
+  runTestWithForcedSideTableScenarios(
+      [&](auto tableVariant, bool forceFullyMaterialized) {
+        auto T = makePathBound(
+            true, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+            std::move(tableVariant), 1, {Variable{"?x"}, Variable{"?start"}},
+            left, right, 0, std::numeric_limits<size_t>::max(),
+            forceFullyMaterialized);
+
+        auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+        assertResultMatchesIdTable(resultTable, expected);
+      },
+      leftOpTable.clone());
+  runTestWithForcedSideTableScenarios(
+      [&](auto tableVariant, bool forceFullyMaterialized) {
+        auto T = makePathBound(
+            true, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+            std::move(tableVariant), 1, {std::nullopt, Variable{"?start"}},
+            left, right, 0, std::numeric_limits<size_t>::max(),
+            forceFullyMaterialized);
+
+        auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+        assertResultMatchesIdTable(resultTable, expected);
+      },
+      std::move(leftOpTable));
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, idToRightBound) {
   auto sub = makeIdTableFromVector({
       {0, 1},
@@ -293,29 +346,33 @@ TEST_P(TransitivePathTest, idToRightBound) {
 
   TransitivePathSide left(std::nullopt, 0, V(0), 0);
   TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1);
-  {
-    auto T = makePathRightBound(
-        sub.clone(), {Variable{"?start"}, Variable{"?target"}},
-        rightOpTable.clone(), 0, {Variable{"?target"}, Variable{"?x"}}, left,
-        right, 0, std::numeric_limits<size_t>::max());
-
-    auto resultTable = T->computeResultOnlyForTesting();
-    ASSERT_THAT(resultTable.idTable(),
-                ::testing::UnorderedElementsAreArray(expected));
-  }
-  {
-    auto T = makePathRightBound(
-        std::move(sub), {Variable{"?start"}, Variable{"?target"}},
-        std::move(rightOpTable), 0, {Variable{"?target"}, std::nullopt},
-        std::move(left), std::move(right), 0,
-        std::numeric_limits<size_t>::max());
-
-    auto resultTable = T->computeResultOnlyForTesting();
-    ASSERT_THAT(resultTable.idTable(),
-                ::testing::UnorderedElementsAreArray(expected));
-  }
+  runTestWithForcedSideTableScenarios(
+      [&](auto tableVariant, bool forceFullyMaterialized) {
+        auto T = makePathBound(
+            false, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+            std::move(tableVariant), 0, {Variable{"?target"}, Variable{"?x"}},
+            left, right, 0, std::numeric_limits<size_t>::max(),
+            forceFullyMaterialized);
+
+        auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+        assertResultMatchesIdTable(resultTable, expected);
+      },
+      rightOpTable.clone());
+  runTestWithForcedSideTableScenarios(
+      [&](auto tableVariant, bool forceFullyMaterialized) {
+        auto T = makePathBound(
+            false, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+            std::move(tableVariant), 0, {Variable{"?target"}, std::nullopt},
+            left, right, 0, std::numeric_limits<size_t>::max(),
+            forceFullyMaterialized);
+
+        auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+        assertResultMatchesIdTable(resultTable, expected);
+      },
+      std::move(rightOpTable));
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, leftBoundToVar) {
   auto sub = makeIdTableFromVector({
       {1, 2},
@@ -344,19 +401,21 @@ TEST_P(TransitivePathTest, leftBoundToVar) {
 
   TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0);
   TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1);
-  {
-    auto T = makePathLeftBound(
-        std::move(sub), {Variable{"?start"}, Variable{"?target"}},
-        std::move(leftOpTable), 1, {Variable{"?x"}, Variable{"?start"}},
-        std::move(left), std::move(right), 0,
-        std::numeric_limits<size_t>::max());
-
-    auto resultTable = T->computeResultOnlyForTesting();
-    ASSERT_THAT(resultTable.idTable(),
-                ::testing::UnorderedElementsAreArray(expected));
-  }
+  runTestWithForcedSideTableScenarios(
+      [&](auto tableVariant, bool forceFullyMaterialized) {
+        auto T = makePathBound(
+            true, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+            std::move(tableVariant), 1, {Variable{"?x"}, Variable{"?start"}},
+            left, right, 0, std::numeric_limits<size_t>::max(),
+            forceFullyMaterialized);
+
+        auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+        assertResultMatchesIdTable(resultTable, expected);
+      },
+      std::move(leftOpTable));
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, rightBoundToVar) {
   auto sub = makeIdTableFromVector({
       {1, 2},
@@ -385,16 +444,98 @@ TEST_P(TransitivePathTest, rightBoundToVar) {
 
   TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0);
   TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1);
-  auto T = makePathRightBound(
-      std::move(sub), {Variable{"?start"}, Variable{"?target"}},
-      std::move(rightOpTable), 0, {Variable{"?target"}, Variable{"?x"}},
-      std::move(left), std::move(right), 0, std::numeric_limits<size_t>::max());
-
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  runTestWithForcedSideTableScenarios(
+      [&](auto tableVariant, bool forceFullyMaterialized) {
+        auto T = makePathBound(
+            false, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+            std::move(tableVariant), 0, {Variable{"?target"}, Variable{"?x"}},
+            left, right, 0, std::numeric_limits<size_t>::max(),
+            forceFullyMaterialized);
+
+        auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+        assertResultMatchesIdTable(resultTable, expected);
+      },
+      std::move(rightOpTable));
+}
+
+// _____________________________________________________________________________
+TEST_P(TransitivePathTest, startNodesWithNoMatchesRightBound) {
+  auto sub = makeIdTableFromVector({
+      {1, 2},
+      {3, 4},
+  });
+
+  auto rightOpTable = makeIdTableFromVector({
+      {2, 5},
+      {3, 6},
+      {4, 7},
+  });
+
+  auto expected = makeIdTableFromVector({
+      {1, 2, 5},
+      {3, 4, 7},
+  });
+
+  TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0);
+  TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1);
+  auto T = makePathBound(
+      false, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+      split(rightOpTable), 0, {Variable{"?target"}, Variable{"?x"}}, left,
+      right, 1, std::numeric_limits<size_t>::max());
+
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
+}
+
+// _____________________________________________________________________________
+TEST_P(TransitivePathTest, emptySideTable) {
+  auto sub = makeIdTableFromVector({
+      {1, 2},
+      {3, 4},
+  });
+
+  auto expected = makeIdTableFromVector({});
+
+  TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0);
+  TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1);
+  auto T = makePathBound(true, sub.clone(),
+                         {Variable{"?start"}, Variable{"?target"}},
+                         std::vector<IdTable>{}, 0, {Variable{"?start"}}, left,
+                         right, 0, std::numeric_limits<size_t>::max());
+
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
+}
+
+// _____________________________________________________________________________
+TEST_P(TransitivePathTest, startNodesWithNoMatchesLeftBound) {
+  auto sub = makeIdTableFromVector({
+      {1, 2},
+      {3, 4},
+  });
+
+  auto leftOpTable = makeIdTableFromVector({
+      {2, 5},
+      {3, 6},
+      {4, 7},
+  });
+
+  auto expected = makeIdTableFromVector({
+      {3, 4, 6},
+  });
+
+  TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0);
+  TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1);
+  auto T = makePathBound(
+      true, sub.clone(), {Variable{"?start"}, Variable{"?target"}},
+      split(leftOpTable), 0, {Variable{"?start"}, Variable{"?x"}}, left, right,
+      1, std::numeric_limits<size_t>::max());
+
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, maxLength2FromVariable) {
   auto sub = makeIdTableFromVector({
       {0, 2},
@@ -426,11 +567,11 @@ TEST_P(TransitivePathTest, maxLength2FromVariable) {
   auto T =
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, 2);
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, maxLength2FromId) {
   auto sub = makeIdTableFromVector({
       {0, 2},
@@ -454,11 +595,11 @@ TEST_P(TransitivePathTest, maxLength2FromId) {
   auto T =
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, 2);
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, maxLength2ToId) {
   auto sub = makeIdTableFromVector({
       {0, 2},
@@ -481,11 +622,11 @@ TEST_P(TransitivePathTest, maxLength2ToId) {
   auto T =
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 1, 2);
-  auto resultTable = T->computeResultOnlyForTesting();
-  ASSERT_THAT(resultTable.idTable(),
-              ::testing::UnorderedElementsAreArray(expected));
+  auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
+  assertResultMatchesIdTable(resultTable, expected);
 }
 
+// _____________________________________________________________________________
 TEST_P(TransitivePathTest, zeroLengthException) {
   auto sub = makeIdTableFromVector({
       {0, 2},
@@ -504,15 +645,19 @@ TEST_P(TransitivePathTest, zeroLengthException) {
       makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}},
                       left, right, 0, std::numeric_limits<size_t>::max());
   AD_EXPECT_THROW_WITH_MESSAGE(
-      T->computeResultOnlyForTesting(),
+      T->computeResultOnlyForTesting(requestLaziness()),
       ::testing::ContainsRegex("This query might have to evaluate the empty "
                                "path, which is currently "
                                "not supported"));
 }
 
-INSTANTIATE_TEST_SUITE_P(TransitivePathTestSuite, TransitivePathTest,
-                         testing::Bool(),
-                         [](const testing::TestParamInfo<bool>& info) {
-                           return info.param ? "TransitivePathBinSearch"
-                                             : "TransitivePathHashMap";
-                         });
+// _____________________________________________________________________________
+INSTANTIATE_TEST_SUITE_P(
+    TransitivePathTestSuite, TransitivePathTest,
+    ::testing::Combine(::testing::Bool(), ::testing::Bool()),
+    [](const testing::TestParamInfo<std::tuple<bool, bool>>& info) {
+      std::string result = std::get<0>(info.param) ? "TransitivePathBinSearch"
+                                                   : "TransitivePathHashMap";
+      result += std::get<1>(info.param) ? "Lazy" : "FullyMaterialized";
+      return result;
+    });
diff --git a/test/util/IdTableHelpers.cpp b/test/util/IdTableHelpers.cpp
index d643476256..0b3b0a6a2e 100644
--- a/test/util/IdTableHelpers.cpp
+++ b/test/util/IdTableHelpers.cpp
@@ -248,3 +248,15 @@ std::shared_ptr<QueryExecutionTree> idTableToExecutionTree(
   return ad_utility::makeExecutionTree<ValuesForTesting>(qec, input.clone(),
                                                          std::move(vars));
 }
+
+// _____________________________________________________________________________
+std::pair<IdTable, std::vector<LocalVocab>> aggregateTables(
+    Result::Generator generator, size_t numColumns) {
+  IdTable aggregateTable{numColumns, ad_utility::makeUnlimitedAllocator<Id>()};
+  std::vector<LocalVocab> localVocabs;
+  for (auto& [idTable, localVocab] : generator) {
+    localVocabs.emplace_back(std::move(localVocab));
+    aggregateTable.insertAtEnd(idTable);
+  }
+  return {std::move(aggregateTable), std::move(localVocabs)};
+}
diff --git a/test/util/IdTableHelpers.h b/test/util/IdTableHelpers.h
index 474c0dfd03..bc7035cd2f 100644
--- a/test/util/IdTableHelpers.h
+++ b/test/util/IdTableHelpers.h
@@ -256,3 +256,8 @@ IdTable createRandomlyFilledIdTable(
 /// and filling it with dummy variables.
 std::shared_ptr<QueryExecutionTree> idTableToExecutionTree(
     QueryExecutionContext*, const IdTable&);
+
+// Fully consume a given generator and store it in an `IdTable` and store the
+// local vocabs in a vector.
+std::pair<IdTable, std::vector<LocalVocab>> aggregateTables(
+    Result::Generator generator, size_t numColumns);

From e5284804e27e5f6583a7dc0990947497404b8720 Mon Sep 17 00:00:00 2001
From: Joe <Johannes.R.Herrmann@gmail.com>
Date: Mon, 4 Nov 2024 14:58:51 +0100
Subject: [PATCH 02/12] Add a numPathsPerTarget parameter to PathSearch (#1596)

When this parameter is set, the `PathSearch` service limits the number of paths per `[source, target]` pair. This makes it possible to use the path search for cases where enumerating all paths would exhaust the available time and memory constraints.
---
 docs/path_search.md                  |  39 ++++++++-
 src/engine/PathSearch.cpp            |  43 ++++++----
 src/engine/PathSearch.h              |   7 +-
 src/parser/GraphPatternOperation.cpp |   9 +-
 src/parser/GraphPatternOperation.h   |   1 +
 test/PathSearchTest.cpp              |  33 ++++++++
 test/QueryPlannerTest.cpp            | 120 +++++++++++++++++++++++++++
 7 files changed, 233 insertions(+), 19 deletions(-)

diff --git a/docs/path_search.md b/docs/path_search.md
index 10ae4e0f51..6c9d161377 100644
--- a/docs/path_search.md
+++ b/docs/path_search.md
@@ -48,6 +48,10 @@ SELECT ?start ?end ?path ?edge WHERE {
   **one target**. Sources and targets are paired based on their index (i.e. the paths
   from the first source to the first target are searched, then the second source and
   target, and so on).
+- **pathSearch:numPathsPerTarget** (optional): The path search will only search and store paths,
+  if the number of found paths is lower or equal to the value of the parameter. Expects an integer.
+  Example: if the value is 5, then the search will enumerate all paths until 5 paths have been found.
+  Other paths will be ignored.
 
 
 ### Example 1: Single Source and Target
@@ -170,7 +174,7 @@ SELECT ?start ?end ?path ?edge WHERE {
 }
 ```
 
-This is esecially useful for [N-ary relations](https://www.w3.org/TR/swbp-n-aryRelations/). 
+This is especially useful for [N-ary relations](https://www.w3.org/TR/swbp-n-aryRelations/). 
 Considering the example above, it is possible to query additional relations of `?middle`:
 
 ```sparql
@@ -255,6 +259,39 @@ SELECT ?start ?end ?path ?edge WHERE {
 }
 ```
 
+### Example 5: Limit Number of Paths per Target
+
+It is possible to limit how many paths per target are returned. This is especially useful if
+the query uses a lot of memory. In that case, it is possible to query a limited number of
+paths to debug where the problem is.
+
+The following query for example will only return one path per source and target pair.
+I.e. one path for `(<source1>, <target1>)`, one path for `(<source1>, <target2>)` and so on.
+
+```sparql
+PREFIX pathSearch: <https://qlever.cs.uni-freiburg.de/pathSearch/>
+
+SELECT ?start ?end ?path ?edge WHERE {
+  SERVICE pathSearch: {
+    _:path pathSearch:algorithm pathSearch:allPaths ;
+           pathSearch:source <source1> ;
+           pathSearch:source <source2> ;
+           pathSearch:target <target1> ;
+           pathSearch:target <target2> ;
+           pathSearch:pathColumn ?path ;
+           pathSearch:edgeColumn ?edge ;
+           pathSearch:start ?start ;
+           pathSearch:end ?end ;
+           pathSearch:numPathsPerTarget 1;
+    {
+      SELECT * WHERE {
+        ?start <predicate> ?end.
+      }
+    }
+  }
+}
+```
+
 ## Error Handling
 
 The Path Search feature will throw errors in the following scenarios:
diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp
index 50f10210a6..9291fba19a 100644
--- a/src/engine/PathSearch.cpp
+++ b/src/engine/PathSearch.cpp
@@ -9,6 +9,7 @@
 #include <iterator>
 #include <optional>
 #include <ranges>
+#include <unordered_map>
 #include <variant>
 #include <vector>
 
@@ -262,7 +263,8 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) {
       allSources = binSearch.getSources();
       sources = allSources;
     }
-    paths = allPaths(sources, targets, binSearch, config_.cartesian_);
+    paths = allPaths(sources, targets, binSearch, config_.cartesian_,
+                     config_.numPathsPerTarget_);
 
     timer.stop();
     auto searchTime = timer.msecs();
@@ -326,15 +328,16 @@ PathSearch::handleSearchSides() const {
 }
 
 // _____________________________________________________________________________
-PathsLimited PathSearch::findPaths(const Id& source,
-                                   const std::unordered_set<uint64_t>& targets,
-                                   const BinSearchWrapper& binSearch) const {
+PathsLimited PathSearch::findPaths(
+    const Id& source, const std::unordered_set<uint64_t>& targets,
+    const BinSearchWrapper& binSearch,
+    std::optional<uint64_t> numPathsPerTarget) const {
   std::vector<Edge> edgeStack;
   Path currentPath{EdgesLimited(allocator())};
   std::unordered_map<
-      uint64_t, PathsLimited, std::hash<uint64_t>, std::equal_to<uint64_t>,
-      ad_utility::AllocatorWithLimit<std::pair<const uint64_t, PathsLimited>>>
-      pathCache{allocator()};
+      uint64_t, uint64_t, std::hash<uint64_t>, std::equal_to<uint64_t>,
+      ad_utility::AllocatorWithLimit<std::pair<const uint64_t, uint64_t>>>
+      numPathsPerNode{allocator()};
   PathsLimited result{allocator()};
   std::unordered_set<uint64_t, std::hash<uint64_t>, std::equal_to<uint64_t>,
                      ad_utility::AllocatorWithLimit<uint64_t>>
@@ -357,9 +360,18 @@ PathsLimited PathSearch::findPaths(const Id& source,
       currentPath.pop_back();
     }
 
+    auto edgeEnd = edge.end_.getBits();
+    if (numPathsPerTarget) {
+      auto numPaths = ++numPathsPerNode[edgeEnd];
+
+      if (numPaths > numPathsPerTarget) {
+        continue;
+      }
+    }
+
     currentPath.push_back(edge);
 
-    if (targets.empty() || targets.contains(edge.end_.getBits())) {
+    if (targets.empty() || targets.contains(edgeEnd)) {
       result.push_back(currentPath);
     }
 
@@ -374,10 +386,10 @@ PathsLimited PathSearch::findPaths(const Id& source,
 }
 
 // _____________________________________________________________________________
-PathsLimited PathSearch::allPaths(std::span<const Id> sources,
-                                  std::span<const Id> targets,
-                                  const BinSearchWrapper& binSearch,
-                                  bool cartesian) const {
+PathsLimited PathSearch::allPaths(
+    std::span<const Id> sources, std::span<const Id> targets,
+    const BinSearchWrapper& binSearch, bool cartesian,
+    std::optional<uint64_t> numPathsPerTarget) const {
   PathsLimited paths{allocator()};
   Path path{EdgesLimited(allocator())};
 
@@ -387,14 +399,15 @@ PathsLimited PathSearch::allPaths(std::span<const Id> sources,
       targetSet.insert(target.getBits());
     }
     for (auto source : sources) {
-      for (const auto& path : findPaths(source, targetSet, binSearch)) {
+      for (const auto& path :
+           findPaths(source, targetSet, binSearch, numPathsPerTarget)) {
         paths.push_back(path);
       }
     }
   } else {
     for (size_t i = 0; i < sources.size(); i++) {
-      for (const auto& path :
-           findPaths(sources[i], {targets[i].getBits()}, binSearch)) {
+      for (const auto& path : findPaths(sources[i], {targets[i].getBits()},
+                                        binSearch, numPathsPerTarget)) {
         paths.push_back(path);
       }
     }
diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h
index 9e330d1d4e..b42f277eb7 100644
--- a/src/engine/PathSearch.h
+++ b/src/engine/PathSearch.h
@@ -98,6 +98,7 @@ struct PathSearchConfiguration {
   Variable edgeColumn_;
   std::vector<Variable> edgeProperties_;
   bool cartesian_ = true;
+  std::optional<uint64_t> numPathsPerTarget_ = std::nullopt;
 
   bool sourceIsVariable() const {
     return std::holds_alternative<Variable>(sources_);
@@ -260,7 +261,8 @@ class PathSearch : public Operation {
    */
   pathSearch::PathsLimited findPaths(
       const Id& source, const std::unordered_set<uint64_t>& targets,
-      const pathSearch::BinSearchWrapper& binSearch) const;
+      const pathSearch::BinSearchWrapper& binSearch,
+      std::optional<uint64_t> numPathsPerTarget) const;
 
   /**
    * @brief Finds all paths in the graph.
@@ -268,7 +270,8 @@ class PathSearch : public Operation {
    */
   pathSearch::PathsLimited allPaths(
       std::span<const Id> sources, std::span<const Id> targets,
-      const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const;
+      const pathSearch::BinSearchWrapper& binSearch, bool cartesian,
+      std::optional<uint64_t> numPathsPerTarget) const;
 
   /**
    * @brief Converts paths to a result table with a specified width.
diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp
index efceda159c..90356bb6fa 100644
--- a/src/parser/GraphPatternOperation.cpp
+++ b/src/parser/GraphPatternOperation.cpp
@@ -128,6 +128,12 @@ void PathQuery::addParameter(const SparqlTriple& triple) {
       throw PathSearchException("The parameter 'cartesian' expects a boolean");
     }
     cartesian_ = object.getBool();
+  } else if (predString.ends_with("numPathsPerTarget>")) {
+    if (!object.isInt()) {
+      throw PathSearchException(
+          "The parameter 'numPathsPerTarget' expects an integer");
+    }
+    numPathsPerTarget_ = object.getInt();
   } else if (predString.ends_with("algorithm>")) {
     if (!object.isIri()) {
       throw PathSearchException("The 'algorithm' value has to be an Iri");
@@ -209,7 +215,8 @@ PathSearchConfiguration PathQuery::toPathSearchConfiguration(
   return PathSearchConfiguration{
       algorithm_,          sources,         targets,
       start_.value(),      end_.value(),    pathColumn_.value(),
-      edgeColumn_.value(), edgeProperties_, cartesian_};
+      edgeColumn_.value(), edgeProperties_, cartesian_,
+      numPathsPerTarget_};
 }
 
 // ____________________________________________________________________________
diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h
index 6367d4e510..060e548045 100644
--- a/src/parser/GraphPatternOperation.h
+++ b/src/parser/GraphPatternOperation.h
@@ -174,6 +174,7 @@ struct PathQuery {
 
   GraphPattern childGraphPattern_;
   bool cartesian_ = true;
+  std::optional<uint64_t> numPathsPerTarget_ = std::nullopt;
 
   /**
    * @brief Add a parameter to the PathQuery from the given triple.
diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp
index da8bd31c94..30ca2b42cf 100644
--- a/test/PathSearchTest.cpp
+++ b/test/PathSearchTest.cpp
@@ -543,6 +543,39 @@ TEST(PathSearchTest, elongatedDiamond) {
               ::testing::UnorderedElementsAreArray(expected));
 }
 
+// _____________________________________________________________________________
+TEST(PathSearchTest, numPathsPerTarget) {
+  auto sub =
+      makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}});
+  auto expected = makeIdTableFromVector({
+      {V(0), V(1), I(0), I(0)},
+      {V(1), V(3), I(0), I(1)},
+      {V(3), V(4), I(0), I(2)},
+      {V(0), V(1), I(1), I(0)},
+      {V(1), V(3), I(1), I(1)},
+      {V(3), V(4), I(1), I(2)},
+      {V(4), V(5), I(1), I(3)},
+  });
+
+  std::vector<Id> sources{V(0)};
+  std::vector<Id> targets{V(4), V(5)};
+  Vars vars = {Variable{"?start"}, Variable{"?end"}};
+  PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS,
+                                 sources,
+                                 targets,
+                                 Var{"?start"},
+                                 Var{"?end"},
+                                 Var{"?edgeIndex"},
+                                 Var{"?pathIndex"},
+                                 {},
+                                 true,
+                                 1};
+
+  auto resultTable = performPathSearch(config, std::move(sub), vars);
+  ASSERT_THAT(resultTable.idTable(),
+              ::testing::UnorderedElementsAreArray(expected));
+}
+
 /**
  * Graph:
  *  0       4
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index adb099ea52..d462a39d61 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -920,6 +920,7 @@ TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsCartesian) {
       "}}}}",
       h::PathSearch(config, true, true, scan("?start", "<p>", "?end")), qec);
 }
+
 TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) {
   auto scan = h::IndexScanFromStrings;
   auto qec =
@@ -957,6 +958,45 @@ TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) {
       h::PathSearch(config, true, true, scan("?start", "<p>", "?end")), qec);
 }
 
+// _____________________________________________________________________________
+TEST(QueryPlanner, numPathsPerTarget) {
+  auto scan = h::IndexScanFromStrings;
+  auto qec =
+      ad_utility::testing::getQec("<x1> <p> <y>. <x2> <p> <y>. <y> <p> <z>");
+  auto getId = ad_utility::testing::makeGetId(qec->getIndex());
+
+  std::vector<Id> sources{getId("<x1>"), getId("<x2>")};
+  std::vector<Id> targets{getId("<y>"), getId("<z>")};
+  PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS,
+                                 sources,
+                                 targets,
+                                 Variable("?start"),
+                                 Variable("?end"),
+                                 Variable("?path"),
+                                 Variable("?edge"),
+                                 {},
+                                 true,
+                                 1};
+  h::expect(
+      "PREFIX pathSearch: <https://qlever.cs.uni-freiburg.de/pathSearch/>"
+      "SELECT ?start ?end ?path ?edge WHERE {"
+      "SERVICE pathSearch: {"
+      "_:path pathSearch:algorithm pathSearch:allPaths ;"
+      "pathSearch:source <x1> ;"
+      "pathSearch:source <x2> ;"
+      "pathSearch:target <y> ;"
+      "pathSearch:target <z> ;"
+      "pathSearch:pathColumn ?path ;"
+      "pathSearch:edgeColumn ?edge ;"
+      "pathSearch:start ?start;"
+      "pathSearch:end ?end;"
+      "pathSearch:numPathsPerTarget 1;"
+      "{SELECT * WHERE {"
+      "?start <p> ?end."
+      "}}}}",
+      h::PathSearch(config, true, true, scan("?start", "<p>", "?end")), qec);
+}
+
 TEST(QueryPlanner, PathSearchWithEdgeProperties) {
   auto scan = h::IndexScanFromStrings;
   auto join = h::Join;
@@ -1483,6 +1523,86 @@ TEST(QueryPlanner, PathSearchUnsupportedAlgorithm) {
       parsedQuery::PathSearchException);
 }
 
+// __________________________________________________________________________
+TEST(QueryPlanner, PathSearchWrongArgumentCartesian) {
+  auto qec = ad_utility::testing::getQec("<x> <p> <y>. <y> <p> <z>");
+  auto getId = ad_utility::testing::makeGetId(qec->getIndex());
+
+  auto query =
+      "PREFIX pathSearch: <https://qlever.cs.uni-freiburg.de/pathSearch/>"
+      "SELECT ?start ?end ?path ?edge WHERE {"
+      "SERVICE pathSearch: {"
+      "_:path pathSearch:algorithm pathSearch:allPaths ;"
+      "pathSearch:source ?source1 ;"
+      "pathSearch:source ?source2 ;"
+      "pathSearch:target <z> ;"
+      "pathSearch:pathColumn ?path ;"
+      "pathSearch:edgeColumn ?edge ;"
+      "pathSearch:start ?start;"
+      "pathSearch:end ?end;"
+      "pathSearch:cartesian <false>;"
+      "{SELECT * WHERE {"
+      "?start <p> ?end."
+      "}}}}";
+  AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(
+      h::parseAndPlan(std::move(query), qec),
+      HasSubstr("The parameter 'cartesian' expects a boolean"),
+      parsedQuery::PathSearchException);
+}
+
+// __________________________________________________________________________
+TEST(QueryPlanner, PathSearchWrongArgumentNumPathsPerTarget) {
+  auto qec = ad_utility::testing::getQec("<x> <p> <y>. <y> <p> <z>");
+  auto getId = ad_utility::testing::makeGetId(qec->getIndex());
+
+  auto query =
+      "PREFIX pathSearch: <https://qlever.cs.uni-freiburg.de/pathSearch/>"
+      "SELECT ?start ?end ?path ?edge WHERE {"
+      "SERVICE pathSearch: {"
+      "_:path pathSearch:algorithm pathSearch:allPaths ;"
+      "pathSearch:source ?source1 ;"
+      "pathSearch:source ?source2 ;"
+      "pathSearch:target <z> ;"
+      "pathSearch:pathColumn ?path ;"
+      "pathSearch:edgeColumn ?edge ;"
+      "pathSearch:start ?start;"
+      "pathSearch:end ?end;"
+      "pathSearch:numPathsPerTarget <five>;"
+      "{SELECT * WHERE {"
+      "?start <p> ?end."
+      "}}}}";
+  AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(
+      h::parseAndPlan(std::move(query), qec),
+      HasSubstr("The parameter 'numPathsPerTarget' expects an integer"),
+      parsedQuery::PathSearchException);
+}
+
+// __________________________________________________________________________
+TEST(QueryPlanner, PathSearchWrongArgumentAlgorithm) {
+  auto qec = ad_utility::testing::getQec("<x> <p> <y>. <y> <p> <z>");
+  auto getId = ad_utility::testing::makeGetId(qec->getIndex());
+
+  auto query =
+      "PREFIX pathSearch: <https://qlever.cs.uni-freiburg.de/pathSearch/>"
+      "SELECT ?start ?end ?path ?edge WHERE {"
+      "SERVICE pathSearch: {"
+      "_:path pathSearch:algorithm 1 ;"
+      "pathSearch:source ?source1 ;"
+      "pathSearch:source ?source2 ;"
+      "pathSearch:target <z> ;"
+      "pathSearch:pathColumn ?path ;"
+      "pathSearch:edgeColumn ?edge ;"
+      "pathSearch:start ?start;"
+      "pathSearch:end ?end;"
+      "{SELECT * WHERE {"
+      "?start <p> ?end."
+      "}}}}";
+  AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(
+      h::parseAndPlan(std::move(query), qec),
+      HasSubstr("The 'algorithm' value has to be an Iri"),
+      parsedQuery::PathSearchException);
+}
+
 TEST(QueryPlanner, SpatialJoinViaMaxDistPredicate) {
   auto scan = h::IndexScanFromStrings;
   h::expect(

From 3d321c25c67c6d443a062fe1f3f3e060b1d62ef3 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <joka921@users.noreply.github.com>
Date: Tue, 5 Nov 2024 09:22:14 +0100
Subject: [PATCH 03/12] Allow `REGEX` for arbitrary expressions (not just a
 variable) (#1576)

So far, the `REGEX` function was only implemented for the (frequent) special case, where the first argument is either a variable (like `?x`) or `STR` of a variable (like `STR(?x)`). Now `REGEX` works for arbitrary expressions. Use the occasion to clean up the code a little bit and improve the documentation.
---
 .../sparqlExpressions/RegexExpression.cpp     | 181 +++++++++++-------
 .../sparqlExpressions/RegexExpression.h       |  43 +++--
 test/RegexExpressionTest.cpp                  |  66 +++++--
 3 files changed, 186 insertions(+), 104 deletions(-)

diff --git a/src/engine/sparqlExpressions/RegexExpression.cpp b/src/engine/sparqlExpressions/RegexExpression.cpp
index 9ee49dedab..2cf5757e64 100644
--- a/src/engine/sparqlExpressions/RegexExpression.cpp
+++ b/src/engine/sparqlExpressions/RegexExpression.cpp
@@ -72,19 +72,21 @@ std::optional<std::string> getPrefixRegex(std::string regex) {
 }  // namespace sparqlExpression::detail
 
 namespace sparqlExpression {
+
 // ___________________________________________________________________________
 RegexExpression::RegexExpression(
     SparqlExpression::Ptr child, SparqlExpression::Ptr regex,
     std::optional<SparqlExpression::Ptr> optionalFlags)
     : child_{std::move(child)} {
+  // If we have a `STR()` expression, remove the `STR()` and remember that it
+  // was there.
   if (child_->isStrExpression()) {
     child_ = std::move(std::move(*child_).moveChildrenOut().at(0));
     childIsStrExpression_ = true;
   }
-  if (!dynamic_cast<const VariableExpression*>(child_.get())) {
-    throw std::runtime_error(
-        "REGEX expressions are currently supported only on variables.");
-  }
+
+  // Get the regex string, which must be a string literal without a datatype or
+  // language tag.
   std::string regexString;
   if (auto regexPtr =
           dynamic_cast<const StringLiteralExpression*>(regex.get())) {
@@ -100,6 +102,9 @@ RegexExpression::RegexExpression(
         "The second argument to the REGEX function must be a "
         "string literal (which contains the regular expression)");
   }
+
+  // Parse the flags. The optional argument for that must, again, be a
+  // string literal without a datatype or language tag.
   if (optionalFlags.has_value()) {
     if (auto flagsPtr = dynamic_cast<const StringLiteralExpression*>(
             optionalFlags.value().get())) {
@@ -131,19 +136,18 @@ RegexExpression::RegexExpression(
     }
   }
 
+  // Create RE2 object from the regex string. If it is a simple prefix regex,
+  // store the prefix in `prefixRegex_` (otherwise that becomes `std::nullopt`).
   regexAsString_ = regexString;
-  if (auto opt = detail::getPrefixRegex(regexString)) {
-    regex_ = std::move(opt.value());
-  } else {
-    regex_.emplace<RE2>(regexString, RE2::Quiet);
-    const auto& r = std::get<RE2>(regex_);
-    if (r.error_code() != RE2::NoError) {
-      throw std::runtime_error{absl::StrCat(
-          "The regex \"", regexString,
-          "\" is not supported by QLever (which uses Google's RE2 library). "
-          "Error from RE2 is: ",
-          r.error())};
-    }
+  prefixRegex_ = detail::getPrefixRegex(regexString);
+  regex_.emplace(regexString, RE2::Quiet);
+  const auto& r = regex_.value();
+  if (r.error_code() != RE2::NoError) {
+    throw std::runtime_error{absl::StrCat(
+        "The regex \"", regexString,
+        "\" is not supported by QLever (which uses Google's RE2 library); "
+        "the error from RE2 is: ",
+        r.error())};
   }
 }
 
@@ -163,17 +167,27 @@ std::span<SparqlExpression::Ptr> RegexExpression::childrenImpl() {
 ExpressionResult RegexExpression::evaluatePrefixRegex(
     const Variable& variable,
     sparqlExpression::EvaluationContext* context) const {
-  std::string prefixRegex = std::get<std::string>(regex_);
+  // This function must only be called if we have a simple prefix regex.
+  AD_CORRECTNESS_CHECK(prefixRegex_.has_value());
+  std::string prefixRegex = prefixRegex_.value();
+
+  // If the expression is enclosed in `STR()`, we have two ranges: for the
+  // prefix with and without leading "<".
+  //
+  // TODO<joka921> prefix filters currently have false negatives when the prefix
+  // is not in the vocabulary, and there exist local vocab entries in the input
+  // that are between the prefix and the next local vocab entry. This is
+  // non-trivial to fix as it involves fiddling with Unicode prefix encodings.
+  //
+  // TODO<joka921> prefix filters currently never find numbers or other
+  // datatypes that are encoded directly inside the IDs.
   std::vector<std::string> actualPrefixes;
   actualPrefixes.push_back("\"" + prefixRegex);
-  // If the STR function was applied, we also look for prefix matches for IRIs.
-  // TODO<joka921> prefix filters currently never find numbers or local vocab
-  // entries, numbers, or other datatypes that are encoded directly inside the
-  // IDs.
   if (childIsStrExpression_) {
     actualPrefixes.push_back("<" + prefixRegex);
   }
-  std::vector<ad_utility::SetOfIntervals> resultSetOfIntervals;
+
+  // Compute the (one or two) ranges.
   std::vector<std::pair<Id, Id>> lowerAndUpperIds;
   lowerAndUpperIds.reserve(actualPrefixes.size());
   for (const auto& prefix : actualPrefixes) {
@@ -184,12 +198,21 @@ ExpressionResult RegexExpression::evaluatePrefixRegex(
     }
   }
   checkCancellation(context);
+
+  // Begin and end of the input (for each row of which we want to
+  // evaluate the regex).
   auto beg = context->_inputTable.begin() + context->_beginIndex;
   auto end = context->_inputTable.begin() + context->_endIndex;
   AD_CONTRACT_CHECK(end <= context->_inputTable.end());
+
+  // In this function, the expression is a simple variable. If the input is
+  // sorted by that variable, the result can be computed by a constant number
+  // of binary searches and the result is a set of intervals.
+  std::vector<ad_utility::SetOfIntervals> resultSetOfIntervals;
   if (context->isResultSortedBy(variable)) {
     auto column = context->getColumnIndexForVariable(variable);
     for (auto [lowerId, upperId] : lowerAndUpperIds) {
+      // Two binary searches to find the lower and upper bounds of the range.
       auto lower = std::lower_bound(
           beg, end, nullptr,
           [column, lowerId = lowerId](const auto& l, const auto&) {
@@ -200,7 +223,6 @@ ExpressionResult RegexExpression::evaluatePrefixRegex(
           [column, upperId = upperId](const auto& l, const auto&) {
             return l[column] < upperId;
           });
-
       // Return the empty result as an empty `SetOfIntervals` instead of as an
       // empty range.
       if (lower != upper) {
@@ -212,47 +234,58 @@ ExpressionResult RegexExpression::evaluatePrefixRegex(
     return std::reduce(resultSetOfIntervals.begin(), resultSetOfIntervals.end(),
                        ad_utility::SetOfIntervals{},
                        ad_utility::SetOfIntervals::Union{});
-  } else {
-    auto resultSize = context->size();
-    VectorWithMemoryLimit<Id> result{context->_allocator};
-    result.reserve(resultSize);
-    for (auto id : detail::makeGenerator(variable, resultSize, context)) {
-      result.push_back(Id::makeFromBool(
-          std::ranges::any_of(lowerAndUpperIds, [&](const auto& lowerUpper) {
-            return !valueIdComparators::compareByBits(id, lowerUpper.first) &&
-                   valueIdComparators::compareByBits(id, lowerUpper.second);
-          })));
-      checkCancellation(context);
-    }
-    return result;
   }
+
+  // If the input is not sorted by the variable, we have to check each row
+  // individually (by checking inclusion in the ranges).
+  auto resultSize = context->size();
+  VectorWithMemoryLimit<Id> result{context->_allocator};
+  result.reserve(resultSize);
+  for (auto id : detail::makeGenerator(variable, resultSize, context)) {
+    result.push_back(Id::makeFromBool(
+        std::ranges::any_of(lowerAndUpperIds, [&](const auto& lowerUpper) {
+          return !valueIdComparators::compareByBits(id, lowerUpper.first) &&
+                 valueIdComparators::compareByBits(id, lowerUpper.second);
+        })));
+    checkCancellation(context);
+  }
+  return result;
 }
 
 // ___________________________________________________________________________
-ExpressionResult RegexExpression::evaluateNonPrefixRegex(
-    const Variable& variable,
-    sparqlExpression::EvaluationContext* context) const {
-  AD_CONTRACT_CHECK(std::holds_alternative<RE2>(regex_));
+template <SingleExpressionResult T>
+ExpressionResult RegexExpression::evaluateGeneralCase(
+    T&& input, sparqlExpression::EvaluationContext* context) const {
+  // We have one result for each row of the input.
   auto resultSize = context->size();
   VectorWithMemoryLimit<Id> result{context->_allocator};
   result.reserve(resultSize);
+  AD_CORRECTNESS_CHECK(regex_.has_value());
 
-  auto impl = [&]<typename ValueGetter>(const ValueGetter& getter) {
-    for (auto id : detail::makeGenerator(variable, resultSize, context)) {
-      auto str = getter(id, context);
-      if (!str.has_value()) {
-        result.push_back(Id::makeUndefined());
-      } else {
-        result.push_back(Id::makeFromBool(
-            RE2::PartialMatch(str.value(), std::get<RE2>(regex_))));
-      }
-      checkCancellation(context);
-    }
+  // Compute the result using the given value getter. If the getter returns
+  // `std::nullopt` for a row, the result is `UNDEF`. Otherwise, we have a
+  // string and evaluate the regex on it.
+  auto computeResult = [&]<typename ValueGetter>(const ValueGetter& getter) {
+    std::ranges::for_each(
+        detail::makeGenerator(AD_FWD(input), resultSize, context),
+        [&getter, &context, &result, this](const auto& id) {
+          auto str = getter(id, context);
+          if (!str.has_value()) {
+            result.push_back(Id::makeUndefined());
+          } else {
+            result.push_back(Id::makeFromBool(
+                RE2::PartialMatch(str.value(), regex_.value())));
+          }
+          checkCancellation(context);
+        });
   };
+
+  // Compute the result with the correct value getter (depending on whether the
+  // expression is enclosed in `STR()` or not), and return it.
   if (childIsStrExpression_) {
-    impl(detail::StringValueGetter{});
+    computeResult(detail::StringValueGetter{});
   } else {
-    impl(detail::LiteralFromIdGetter{});
+    computeResult(detail::LiteralFromIdGetter{});
   }
   return result;
 }
@@ -262,51 +295,57 @@ ExpressionResult RegexExpression::evaluate(
     sparqlExpression::EvaluationContext* context) const {
   auto resultAsVariant = child_->evaluate(context);
   auto variablePtr = std::get_if<Variable>(&resultAsVariant);
-  AD_CONTRACT_CHECK(variablePtr);
 
-  if (std::holds_alternative<std::string>(regex_)) {
+  if (prefixRegex_.has_value() && variablePtr != nullptr) {
     return evaluatePrefixRegex(*variablePtr, context);
   } else {
-    return evaluateNonPrefixRegex(*variablePtr, context);
+    return std::visit(
+        [this, context](auto&& input) {
+          return evaluateGeneralCase(AD_FWD(input), context);
+        },
+        std::move(resultAsVariant));
   }
 }
 
 // ____________________________________________________________________________
 bool RegexExpression::isPrefixExpression() const {
-  return std::holds_alternative<std::string>(regex_);
+  return prefixRegex_.has_value();
 }
 
 // ____________________________________________________________________________
 auto RegexExpression::getEstimatesForFilterExpression(
     uint64_t inputSize,
     const std::optional<Variable>& firstSortedVariable) const -> Estimates {
+  // If we have a simple prefix regex, assume that only 10^-k entries remain,
+  // where k is the length of the prefix.
   if (isPrefixExpression()) {
-    // Assume that only 10^-k entries remain, where k is the length of the
-    // prefix. The reason for the -2 is that at this point, _rhs always
-    // starts with ^"
     double reductionFactor = std::pow(
-        10, std::max(
-                0, static_cast<int>(std::get<std::string>(regex_).size()) - 2));
+        10, std::max(0, static_cast<int>(prefixRegex_.value().size())));
     // Cap to reasonable minimal and maximal values to prevent numerical
     // stability problems.
     reductionFactor = std::min(100000000.0, reductionFactor);
     reductionFactor = std::max(1.0, reductionFactor);
     size_t sizeEstimate = inputSize / static_cast<size_t>(reductionFactor);
     auto varPtr = dynamic_cast<VariableExpression*>(child_.get());
-    AD_CONTRACT_CHECK(varPtr);
-    size_t costEstimate = firstSortedVariable == varPtr->value()
+    size_t costEstimate = (varPtr && firstSortedVariable == varPtr->value())
                               ? sizeEstimate
                               : sizeEstimate + inputSize;
 
-    return {sizeEstimate, costEstimate};
-  } else {  // Not a prefix filter.
-    size_t sizeEstimate = inputSize / 2;
-    // We assume that checking a REGEX for an element is 10 times more
-    // expensive than an "ordinary" filter check.
-    size_t costEstimate = sizeEstimate + 10 * inputSize;
-
     return {sizeEstimate, costEstimate};
   }
+
+  // For the general case, we make two assumptions.
+  //
+  // 1. Half of the entries remain after the filter. This is a very simple
+  // and arbitrary heuristic.
+  //
+  // 2. Checking a REGEX for an element is 10 times more expensive than a
+  // "simple" filter check. This is reasonable because regex evaluations are
+  // expensive, but the fixed factor disregard that it depends on the
+  // complexity of the regex how expensive it is.
+  size_t sizeEstimate = inputSize / 2;
+  size_t costEstimate = sizeEstimate + 10 * inputSize;
+  return {sizeEstimate, costEstimate};
 }
 
 // ____________________________________________________________________________
diff --git a/src/engine/sparqlExpressions/RegexExpression.h b/src/engine/sparqlExpressions/RegexExpression.h
index 783acc22db..cbf95c7f38 100644
--- a/src/engine/sparqlExpressions/RegexExpression.h
+++ b/src/engine/sparqlExpressions/RegexExpression.h
@@ -1,6 +1,6 @@
-//  Copyright 2022, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
+// Copyright 2022 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
 
 #pragma once
 
@@ -11,22 +11,27 @@
 #include "re2/re2.h"
 
 namespace sparqlExpression {
+// Class implementing the REGEX function, which takes two mandatory arguments
+// (an expression and a regex) and one optional argument (a string of flags).
 class RegexExpression : public SparqlExpression {
  private:
   SparqlExpression::Ptr child_;
-  // If this variant holds a string, we consider this string as the prefix of a
-  // prefix regex.
-  std::variant<std::string, RE2> regex_;
+  // The reguar expression. It needs to be a `std::optional` because `RE2`
+  // objects do not have a default constructor.
+  std::optional<RE2> regex_;
+  // If this `std::optional` holds a string, we have a simple prefix regex
+  // (which translates to a range search) and this string holds the prefix.
+  std::optional<std::string> prefixRegex_;
   // The regex as a string, used for the cache key.
   std::string regexAsString_;
 
-  // True if the STR() function is to be applied on the child before evaluating
-  // the regex.
+  // True iff the expression is enclosed in `STR()`.
   bool childIsStrExpression_ = false;
 
  public:
-  // `child` must be a `VariableExpression` and `regex` must be a
-  // `LiteralExpression` that stores a string, else an exception will be thrown.
+  // The `child` must be a `VariableExpression` and `regex` must be a
+  // `LiteralExpression` that stores a string, otherwise an exception will be
+  // thrown.
   RegexExpression(SparqlExpression::Ptr child, SparqlExpression::Ptr regex,
                   std::optional<SparqlExpression::Ptr> optionalFlags);
 
@@ -46,17 +51,21 @@ class RegexExpression : public SparqlExpression {
 
  private:
   std::span<SparqlExpression::Ptr> childrenImpl() override;
-  // Internal implementations that are called by `evaluate`.
+
+  // Evaluate for the special case, where the expression is a variable and we
+  // have a simple prefix regex (in which case the regex match translates to a
+  // simple range check).
   ExpressionResult evaluatePrefixRegex(
       const Variable& variable,
       sparqlExpression::EvaluationContext* context) const;
-  ExpressionResult evaluateNonPrefixRegex(
-      const Variable& variable,
-      sparqlExpression::EvaluationContext* context) const;
 
-  /// Helper function to check if the `CancellationHandle` of the passed
-  /// `EvaluationContext` has been cancelled and throw an exception if this is
-  /// the case.
+  // Evaluate for the general case.
+  template <SingleExpressionResult T>
+  ExpressionResult evaluateGeneralCase(
+      T&& input, sparqlExpression::EvaluationContext* context) const;
+
+  // Check if the `CancellationHandle` of `context` has been cancelled and throw
+  // an exception if this is the case.
   static void checkCancellation(
       const sparqlExpression::EvaluationContext* context,
       ad_utility::source_location location =
diff --git a/test/RegexExpressionTest.cpp b/test/RegexExpressionTest.cpp
index 01643c1de8..0e94d3603d 100644
--- a/test/RegexExpressionTest.cpp
+++ b/test/RegexExpressionTest.cpp
@@ -1,6 +1,6 @@
-//  Copyright 2022, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
+// Copyright 2022 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
 
 #include <optional>
 #include <string>
@@ -23,8 +23,11 @@ constexpr auto T = Id::makeFromBool(true);
 constexpr auto F = Id::makeFromBool(false);
 constexpr Id U = Id::makeUndefined();
 
+// Make `RegexExpression` from given the `child` (the expression on which to
+// apply the regex), `regex`, and optional `flags`. The argument `childAsStr` is
+// true iff the expression is enclosed in a `STR()` function.
 RegexExpression makeRegexExpression(
-    std::string variable, std::string regex,
+    SparqlExpression::Ptr child, std::string regex,
     std::optional<std::string> flags = std::nullopt, bool childAsStr = false) {
   // The regex and the flags both have to be enquoted. This is normally ensured
   // by the SPARQL parser. For easier readability of the tests we add those
@@ -33,10 +36,8 @@ RegexExpression makeRegexExpression(
   if (flags.has_value()) {
     flags.value() = absl::StrCat("\"", flags.value(), "\"");
   }
-  SparqlExpression::Ptr variableExpression =
-      std::make_unique<VariableExpression>(Variable{std::move(variable)});
   if (childAsStr) {
-    variableExpression = makeStrExpression(std::move(variableExpression));
+    child = makeStrExpression(std::move(child));
   }
   auto regexExpression = std::make_unique<StringLiteralExpression>(lit(regex));
   std::optional<SparqlExpression::Ptr> flagsExpression = std::nullopt;
@@ -45,18 +46,33 @@ RegexExpression makeRegexExpression(
         std::make_unique<StringLiteralExpression>(lit(flags.value()))};
   }
 
-  return {std::move(variableExpression), std::move(regexExpression),
+  return {std::move(child), std::move(regexExpression),
           std::move(flagsExpression)};
 }
+
+// Special case of the `makeRegexExpression` above, where the `child`
+// expression is a variable.
+RegexExpression makeRegexExpression(
+    std::string variable, std::string regex,
+    std::optional<std::string> flags = std::nullopt, bool childAsStr = false) {
+  SparqlExpression::Ptr variableExpression =
+      std::make_unique<VariableExpression>(Variable{std::move(variable)});
+  return makeRegexExpression(std::move(variableExpression), std::move(regex),
+                             std::move(flags), childAsStr);
+}
 }  // namespace
 
 // Test that the expression `leftValue Comparator rightValue`, when evaluated on
 // the `TestContext` (see above), yields the `expected` result.
 void testWithExplicitResult(const SparqlExpression& expression,
                             std::vector<Id> expected,
+                            std::optional<size_t> numInputs = std::nullopt,
                             source_location l = source_location::current()) {
-  static TestContext ctx;
+  TestContext ctx;
   auto trace = generateLocationTrace(l, "testWithExplicitResult");
+  if (numInputs.has_value()) {
+    ctx.context._endIndex = numInputs.value();
+  }
   auto resultAsVariant = expression.evaluate(&ctx.context);
   const auto& result = std::get<VectorWithMemoryLimit<Id>>(resultAsVariant);
 
@@ -74,6 +90,8 @@ auto testNonPrefixRegex = [](std::string variable, std::string regex,
   testWithExplicitResult(expr, expectedResult);
 };
 
+// Tests where the expression is a variable and the regex is not a simple prefix
+// regex (that translates to a simple range search).
 TEST(RegexExpression, nonPrefixRegex) {
   // ?vocab column is `"Beta", "alpha", "älpha"
   // ?mixed column is `1, -0.1, <x>`
@@ -83,10 +101,11 @@ TEST(RegexExpression, nonPrefixRegex) {
   test("?vocab", "l[^a]{2}a", {F, T, T});
   test("?vocab", "[el][^a]*a", {T, T, T});
   test("?vocab", "B", {T, F, F});
-  // case-sensitive by default.
+
+  // The match is case-sensitive by default.
   test("?vocab", "b", {F, F, F});
 
-  // Not a prefix expression because of the "special" regex characters
+  // A prefix regex, but not a fixed string.
   test("?vocab", "^a.*", {F, T, F});
 
   test("?mixed", "x", {U, U, U});
@@ -96,10 +115,27 @@ TEST(RegexExpression, nonPrefixRegex) {
 
   // ?localVocab column is "notInVocabA", "notInVocabB", <"notInVocabD">
   test("?localVocab", "InV", {T, T, U});
+
   // The IRI is only considered when testing with a STR expression
   test("?localVocab", "Vocab[AD]", {T, F, T}, true);
 }
 
+// Test where the expression is not simply a variable.
+TEST(RegexExpression, inputNotVariable) {
+  // Our expression is a fixed string literal: "hallo".
+  VectorWithMemoryLimit<IdOrLiteralOrIri> input{
+      ad_utility::testing::getQec()->getAllocator()};
+  input.push_back(ad_utility::triple_component::LiteralOrIri(lit("\"hallo\"")));
+  auto child = std::make_unique<sparqlExpression::detail::SingleUseExpression>(
+      input.clone());
+
+  // "hallo" matches the regex "ha".
+  auto expr = makeRegexExpression(std::move(child), "ha", "");
+  std::vector<Id> expected;
+  expected.push_back(Id::makeFromBool(true));
+  testWithExplicitResult(expr, expected, input.size());
+}
+
 auto testNonPrefixRegexWithFlags =
     [](std::string variable, std::string regex, std::string flags,
        const std::vector<Id>& expectedResult,
@@ -111,6 +147,7 @@ auto testNonPrefixRegexWithFlags =
       testWithExplicitResult(expr, expectedResult);
     };
 
+// Fun with flags.
 TEST(RegexExpression, nonPrefixRegexWithFlags) {
   // ?vocab column is `"Beta", "alpha", "älpha"
   // ?mixed column is `1, -0.1, A`
@@ -141,6 +178,8 @@ TEST(RegexExpression, nonPrefixRegexWithFlags) {
   // TODO<joka921>  Add tests for other flags (maybe the non-greedy one?)
 }
 
+// Test the `getPrefixRegex` function (which returns `std::nullopt` if the regex
+// is not a simple prefix regex).
 TEST(RegexExpression, getPrefixRegex) {
   using namespace sparqlExpression::detail;
   ASSERT_EQ(std::nullopt, getPrefixRegex("alpha"));
@@ -265,11 +304,6 @@ TEST(RegexExpression, invalidConstruction) {
     return std::make_unique<VariableExpression>(Variable{std::move(literal)});
   };
 
-  // The first argument must be a variable.
-  EXPECT_THROW(
-      RegexExpression(literal("\"a\""), literal("\"b\""), std::nullopt),
-      std::runtime_error);
-
   // The second argument must be a string literal.
   EXPECT_THROW(RegexExpression(variable("?a"), variable("?b"), std::nullopt),
                std::runtime_error);

From f1490771f7a70d49bc1d3d08a95e31488aaa505e Mon Sep 17 00:00:00 2001
From: RobinTF <83676088+RobinTF@users.noreply.github.com>
Date: Wed, 6 Nov 2024 10:03:13 +0100
Subject: [PATCH 04/12] Simplify the `CartesianProductJoin` class (#1598)

Refactor some rather large functions into smaller ones. This will make the lazy implementation of this class much simpler to implement and to review.
---
 src/engine/CartesianProductJoin.cpp | 174 ++++++++++++++--------------
 src/engine/CartesianProductJoin.h   |  14 ++-
 2 files changed, 96 insertions(+), 92 deletions(-)

diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp
index c73361e001..b9bb514ba7 100644
--- a/src/engine/CartesianProductJoin.cpp
+++ b/src/engine/CartesianProductJoin.cpp
@@ -53,22 +53,21 @@ string CartesianProductJoin::getCacheKeyImpl() const {
 // ____________________________________________________________________________
 size_t CartesianProductJoin::getResultWidth() const {
   auto view = childView() | std::views::transform(&Operation::getResultWidth);
-  return std::accumulate(view.begin(), view.end(), 0UL, std::plus{});
+  return std::reduce(view.begin(), view.end(), 0UL, std::plus{});
 }
 
 // ____________________________________________________________________________
 size_t CartesianProductJoin::getCostEstimate() {
   auto childSizes =
       childView() | std::views::transform(&Operation::getCostEstimate);
-  return getSizeEstimate() + std::accumulate(childSizes.begin(),
-                                             childSizes.end(), 0UL,
-                                             std::plus{});
+  return getSizeEstimate() +
+         std::reduce(childSizes.begin(), childSizes.end(), 0UL, std::plus{});
 }
 
 // ____________________________________________________________________________
 uint64_t CartesianProductJoin::getSizeEstimateBeforeLimit() {
   auto view = childView() | std::views::transform(&Operation::getSizeEstimate);
-  return std::accumulate(view.begin(), view.end(), 1UL, std::multiplies{});
+  return std::reduce(view.begin(), view.end(), 1UL, std::multiplies{});
 }
 
 // ____________________________________________________________________________
@@ -85,13 +84,10 @@ bool CartesianProductJoin::knownEmptyResult() {
 }
 
 // ____________________________________________________________________________
-template <size_t StaticGroupSize>
 void CartesianProductJoin::writeResultColumn(std::span<Id> targetColumn,
                                              std::span<const Id> inputColumn,
-                                             size_t groupSize, size_t offset) {
-  if (StaticGroupSize != 0) {
-    AD_CORRECTNESS_CHECK(StaticGroupSize == groupSize);
-  }
+                                             size_t groupSize,
+                                             size_t offset) const {
   // Copy each element from the `inputColumn` `groupSize` times to
   // the `targetColumn`, repeat until the `targetColumn` is completely filled.
   size_t numRowsWritten = 0;
@@ -104,20 +100,13 @@ void CartesianProductJoin::writeResultColumn(std::span<Id> targetColumn,
   size_t groupStartIdx = offset % groupSize;
   while (true) {
     for (size_t i = firstInputElementIdx; i < inputSize; ++i) {
-      auto writeGroup = [&](size_t actualGroupSize) {
-        for (size_t u = groupStartIdx; u < actualGroupSize; ++u) {
-          if (numRowsWritten == targetSize) {
-            return;
-          }
-          targetColumn[numRowsWritten] = inputColumn[i];
-          ++numRowsWritten;
-          checkCancellation();
+      for (size_t u = groupStartIdx; u < groupSize; ++u) {
+        if (numRowsWritten == targetSize) {
+          return;
         }
-      };
-      if constexpr (StaticGroupSize == 0) {
-        writeGroup(groupSize);
-      } else {
-        writeGroup(StaticGroupSize);
+        targetColumn[numRowsWritten] = inputColumn[i];
+        ++numRowsWritten;
+        checkCancellation();
       }
       if (numRowsWritten == targetSize) {
         return;
@@ -131,61 +120,52 @@ void CartesianProductJoin::writeResultColumn(std::span<Id> targetColumn,
     firstInputElementIdx = 0;
   }
 }
+
 // ____________________________________________________________________________
 ProtoResult CartesianProductJoin::computeResult(
     [[maybe_unused]] bool requestLaziness) {
-  IdTable result{getExecutionContext()->getAllocator()};
-  result.setNumColumns(getResultWidth());
-  std::vector<std::shared_ptr<const Result>> subResults;
+  std::vector<std::shared_ptr<const Result>> subResults = calculateSubResults();
 
-  // We don't need to fully materialize the child results if we have a LIMIT
-  // specified and an OFFSET of 0.
-  // TODO<joka921> We could in theory also apply this optimization if a
-  // non-zero OFFSET is specified, but this would make the algorithm more
-  // complicated.
-  std::optional<LimitOffsetClause> limitIfPresent = getLimit();
-  if (!getLimit()._limit.has_value() || getLimit()._offset != 0) {
-    limitIfPresent = std::nullopt;
-  }
-
-  // Get all child results (possibly with limit, see above).
-  for (auto& child : childView()) {
-    if (limitIfPresent.has_value() && child.supportsLimit()) {
-      child.setLimit(limitIfPresent.value());
-    }
-    subResults.push_back(child.getResult());
+  IdTable result = writeAllColumns(subResults);
 
-    const auto& table = subResults.back()->idTable();
-    // Early stopping: If one of the results is empty, we can stop early.
-    if (table.empty()) {
-      break;
-    }
+  // Dereference all the subresult pointers because `getSharedLocalVocabFrom...`
+  // requires a range of references, not pointers.
+  auto subResultsDeref = std::views::transform(
+      subResults, [](auto& x) -> decltype(auto) { return *x; });
+  return {std::move(result), resultSortedOn(),
+          Result::getMergedLocalVocab(subResultsDeref)};
+}
 
-    // If one of the children is the neutral element (because of a triple with
-    // zero variables), we can simply ignore it here.
-    if (table.numRows() == 1 && table.numColumns() == 0) {
-      subResults.pop_back();
-      continue;
-    }
-    // Example for the following calculation: If we have a LIMIT of 1000 and
-    // the first child already has a result of size 100, then the second child
-    // needs to evaluate only its first 10 results. The +1 is because integer
-    // divisions are rounded down by default.
-    if (limitIfPresent.has_value()) {
-      limitIfPresent.value()._limit = limitIfPresent.value()._limit.value() /
-                                          subResults.back()->idTable().size() +
-                                      1;
+// ____________________________________________________________________________
+VariableToColumnMap CartesianProductJoin::computeVariableToColumnMap() const {
+  VariableToColumnMap result;
+  // It is crucial that we also count the columns in the inputs to which no
+  // variable was assigned. This is managed by the `offset` variable.
+  size_t offset = 0;
+  for (const auto& child : childView()) {
+    for (auto varCol : child.getExternallyVisibleVariableColumns()) {
+      varCol.second.columnIndex_ += offset;
+      result.insert(std::move(varCol));
     }
+    // `getResultWidth` contains all the columns, not only the ones to which a
+    // variable is assigned.
+    offset += child.getResultWidth();
   }
+  return result;
+}
 
+// _____________________________________________________________________________
+IdTable CartesianProductJoin::writeAllColumns(
+    const std::vector<std::shared_ptr<const Result>>& subResults) const {
+  IdTable result{getResultWidth(), getExecutionContext()->getAllocator()};
   // TODO<joka921> Find a solution to cheaply handle the case, that only a
   // single result is left. This can probably be done by using the
   // `ProtoResult`.
 
   auto sizesView = std::views::transform(
       subResults, [](const auto& child) { return child->idTable().size(); });
-  auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(),
-                                         1UL, std::multiplies{});
+  auto totalResultSize =
+      std::reduce(sizesView.begin(), sizesView.end(), 1UL, std::multiplies{});
 
   size_t totalSizeIncludingLimit = getLimit().actualSize(totalResultSize);
   size_t offset = getLimit().actualOffset(totalResultSize);
@@ -211,37 +191,57 @@ ProtoResult CartesianProductJoin::computeResult(
       const auto& input = subResultPtr->idTable();
       for (const auto& inputCol : input.getColumns()) {
         decltype(auto) resultCol = result.getColumn(resultColIdx);
-        ad_utility::callFixedSize(groupSize, [&]<size_t I>() {
-          writeResultColumn<I>(resultCol, inputCol, groupSize, offset);
-        });
+        writeResultColumn(resultCol, inputCol, groupSize, offset);
         ++resultColIdx;
       }
       groupSize *= input.numRows();
     }
   }
-
-  // Dereference all the subresult pointers because `getSharedLocalVocabFrom...`
-  // requires a range of references, not pointers.
-  auto subResultsDeref = std::views::transform(
-      subResults, [](auto& x) -> decltype(auto) { return *x; });
-  return {std::move(result), resultSortedOn(),
-          Result::getMergedLocalVocab(subResultsDeref)};
+  return result;
 }
 
-// ____________________________________________________________________________
-VariableToColumnMap CartesianProductJoin::computeVariableToColumnMap() const {
-  VariableToColumnMap result;
-  // It is crucial that we also count the columns in the inputs to which no
-  // variable was assigned. This is managed by the `offset` variable.
-  size_t offset = 0;
-  for (const auto& child : childView()) {
-    for (auto varCol : child.getExternallyVisibleVariableColumns()) {
-      varCol.second.columnIndex_ += offset;
-      result.insert(std::move(varCol));
+// _____________________________________________________________________________
+std::vector<std::shared_ptr<const Result>>
+CartesianProductJoin::calculateSubResults() {
+  std::vector<std::shared_ptr<const Result>> subResults;
+  // We don't need to fully materialize the child results if we have a LIMIT
+  // specified and an OFFSET of 0.
+  // TODO<joka921> We could in theory also apply this optimization if a
+  // non-zero OFFSET is specified, but this would make the algorithm more
+  // complicated.
+  std::optional<LimitOffsetClause> limitIfPresent = getLimit();
+  if (!getLimit()._limit.has_value() || getLimit()._offset != 0) {
+    limitIfPresent = std::nullopt;
+  }
+
+  // Get all child results (possibly with limit, see above).
+  for (auto& child : childView()) {
+    if (limitIfPresent.has_value() && child.supportsLimit()) {
+      child.setLimit(limitIfPresent.value());
+    }
+    subResults.push_back(child.getResult());
+
+    const auto& table = subResults.back()->idTable();
+    // Early stopping: If one of the results is empty, we can stop early.
+    if (table.empty()) {
+      break;
+    }
+
+    // If one of the children is the neutral element (because of a triple with
+    // zero variables), we can simply ignore it here.
+    if (table.numRows() == 1 && table.numColumns() == 0) {
+      subResults.pop_back();
+      continue;
+    }
+    // Example for the following calculation: If we have a LIMIT of 1000 and
+    // the first child already has a result of size 100, then the second child
+    // needs to evaluate only its first 10 results. The +1 is because integer
+    // divisions are rounded down by default.
+    if (limitIfPresent.has_value()) {
+      limitIfPresent.value()._limit = limitIfPresent.value()._limit.value() /
+                                          subResults.back()->idTable().size() +
+                                      1;
     }
-    // `getResultWidth` contains all the columns, not only the ones to which a
-    // variable is assigned.
-    offset += child.getResultWidth();
   }
-  return result;
+  return subResults;
 }
diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h
index 779adf1dba..de130a739e 100644
--- a/src/engine/CartesianProductJoin.h
+++ b/src/engine/CartesianProductJoin.h
@@ -82,11 +82,15 @@ class CartesianProductJoin : public Operation {
   // Copy each element from the `inputColumn` `groupSize` times to the
   // `targetColumn`. Repeat until the `targetColumn` is completely filled. Skip
   // the first `offset` write operations to the `targetColumn`. Call
-  // `checkCancellation` after each write. If `StaticGroupSize != 0`, then the
-  // group size is known at compile time which allows for more efficient loop
-  // processing for very small group sizes.
-  template <size_t StaticGroupSize = 0>
+  // `checkCancellation` after each write.
   void writeResultColumn(std::span<Id> targetColumn,
                          std::span<const Id> inputColumn, size_t groupSize,
-                         size_t offset);
+                         size_t offset) const;
+
+  // Write all columns of the subresults into an `IdTable` and return it.
+  IdTable writeAllColumns(
+      const std::vector<std::shared_ptr<const Result>>& subResults) const;
+
+  // Calculate the subresults of the children and store them into a vector.
+  std::vector<std::shared_ptr<const Result>> calculateSubResults();
 };

From 80938b74f8a7e4919abd479a357bbe759896363f Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <joka921@users.noreply.github.com>
Date: Thu, 7 Nov 2024 11:48:34 +0100
Subject: [PATCH 05/12] Docker build no tests for ARM (#1599)

The cross-compilatiion currently takes more than 6 hours and is then cancelled by GitHub actions.
We thus disable the building and execution of unit tests for the ARM64 build.
---
 Dockerfile | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index bd514e1e12..15d7754191 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,16 +8,21 @@ RUN apt-get update && apt-get install -y software-properties-common wget && add-
 RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh
 
 FROM base as builder
+ARG TARGETPLATFORM
 RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev
-
 COPY . /app/
 
 WORKDIR /app/
 ENV DEBIAN_FRONTEND=noninteractive
 
 WORKDIR /app/build/
-RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja
-RUN ctest --rerun-failed --output-on-failure
+RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja ..
+# When cross-compiling the container for ARM64, then compiling and running all tests runs into a timeout on GitHub actions,
+# so we disable tests for this platform.
+# TODO(joka921) re-enable these tests as soon as we can use a native ARM64 platform to compile the docker container.
+RUN if  [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "target is ARM64, don't build tests to avoid timeout"; fi
+RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then cmake --build . --target IndexBuilderMain ServerMain; else cmake --build . ; fi
+RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "Skipping tests for ARM64" ; else ctest --rerun-failed --output-on-failure ; fi
 
 FROM base as runtime
 WORKDIR /app

From 0421dab7d1a1e72b13f1a128565924631b3b37ae Mon Sep 17 00:00:00 2001
From: Julian <14220769+Qup42@users.noreply.github.com>
Date: Thu, 7 Nov 2024 12:01:21 +0100
Subject: [PATCH 06/12] Improve `SparqlQleverVisitor.cpp` code coverage (#1591)

Fill some gaps in the test coverage for the code that parses UPDATE requests.
---
 .../sparqlParser/SparqlQleverVisitor.cpp      | 16 +++++++++---
 test/SparqlAntlrParserTest.cpp                | 25 +++++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index 7e8ae8facf..db5002eea6 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -516,13 +516,23 @@ ParsedQuery Visitor::visit(Parser::ModifyContext* ctx) {
       return true;
     }
   };
+  auto isVisibleIfVariableGraph =
+      [this](const SparqlTripleSimpleWithGraph::Graph& graph) {
+        if (std::holds_alternative<Variable>(graph)) {
+          return ad_utility::contains(parsedQuery_.getVisibleVariables(),
+                                      std::get<Variable>(graph));
+        } else {
+          return true;
+        }
+      };
   auto checkTriples =
-      [&isVisibleIfVariable,
-       &ctx](const std::vector<SparqlTripleSimpleWithGraph>& triples) {
+      [&isVisibleIfVariable, &ctx, &isVisibleIfVariableGraph](
+          const std::vector<SparqlTripleSimpleWithGraph>& triples) {
         for (auto& triple : triples) {
           if (!(isVisibleIfVariable(triple.s_) &&
                 isVisibleIfVariable(triple.p_) &&
-                isVisibleIfVariable(triple.o_))) {
+                isVisibleIfVariable(triple.o_) &&
+                isVisibleIfVariableGraph(triple.g_))) {
             reportError(ctx,
                         absl::StrCat("A triple contains a variable that was "
                                      "not bound in the query body."));
diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp
index 06e8ae3a79..8983a27a3f 100644
--- a/test/SparqlAntlrParserTest.cpp
+++ b/test/SparqlAntlrParserTest.cpp
@@ -2047,7 +2047,14 @@ TEST(SparqlParser, UpdateQuery) {
           m::GraphUpdate({{Var("?a"), Iri("<b>"), Iri("<c>"), noGraph}}, {},
                          std::nullopt),
           m::GraphPattern(m::Triples({{Iri("<d>"), "<e>", Var{"?a"}}}))));
+  // Use variables that are not visible in the query body. Do this for all parts
+  // of the quad for coverage reasons.
   expectUpdateFails("DELETE { ?a <b> <c> } WHERE { <a> ?b ?c }");
+  expectUpdateFails("DELETE { <c> <d> <c> . <e> ?a <f> } WHERE { <a> ?b ?c }");
+  expectUpdateFails(
+      "DELETE { GRAPH <foo> { <c> <d> <c> . <e> <f> ?a } } WHERE { <a> ?b ?c "
+      "}");
+  expectUpdateFails("DELETE { GRAPH ?a { <c> <d> <c> } } WHERE { <a> ?b ?c }");
   expectUpdate(
       "DELETE { ?a <b> <c> } INSERT { <a> ?a <c> } WHERE { <d> <e> ?a }",
       m::UpdateClause(
@@ -2167,6 +2174,24 @@ TEST(SparqlParser, GraphRef) {
   expectGraphRefAll("GRAPH <foo>", m::GraphRefIri("<foo>"));
 }
 
+TEST(SparqlParser, QuadsNotTriples) {
+  auto expectQuadsNotTriples =
+      ExpectCompleteParse<&Parser::quadsNotTriples>{defaultPrefixMap};
+  auto expectQuadsNotTriplesFails =
+      ExpectParseFails<&Parser::quadsNotTriples>{};
+  const auto Iri = TripleComponent::Iri::fromIriref;
+
+  expectQuadsNotTriples(
+      "GRAPH <foo> { <a> <b> <c> }",
+      testing::ElementsAre(
+          m::Quad(Iri("<a>"), Iri("<b>"), Iri("<c>"), ::Iri("<foo>"))));
+  expectQuadsNotTriples(
+      "GRAPH ?f { <a> <b> <c> }",
+      ElementsAre(m::Quad(Iri("<a>"), Iri("<b>"), Iri("<c>"), Var{"?f"})));
+  expectQuadsNotTriplesFails("GRAPH \"foo\" { <a> <b> <c> }");
+  expectQuadsNotTriplesFails("GRAPH _:blankNode { <a> <b> <c> }");
+}
+
 TEST(SparqlParser, SourceSelector) {
   // This will be implemented soon, but for now we test the failure for the
   // coverage tool.

From 3fa3ff08dd6b84fd7eed6859bc23c032d7489770 Mon Sep 17 00:00:00 2001
From: Julian <14220769+Qup42@users.noreply.github.com>
Date: Thu, 7 Nov 2024 13:22:33 +0100
Subject: [PATCH 07/12] Prepare the actual execution of UPDATE requests (#1592)

Add some helper functions that can and will be used to convert A `QueryExecutionTree` and the parsed representation of an update clause into the format, that the `DeltaTriples` expect.
---
 src/engine/CMakeLists.txt              |   2 +-
 src/engine/ExecuteUpdate.cpp           | 101 +++++++++++
 src/engine/ExecuteUpdate.h             |  44 +++++
 src/engine/ExportQueryExecutionTrees.h |   2 +
 src/engine/Server.cpp                  |   3 +
 src/parser/data/Iri.h                  |   1 +
 test/CMakeLists.txt                    |   2 +
 test/CachingMemoryResourceTest.cpp     |   4 +
 test/DeltaTriplesTest.cpp              |  37 +----
 test/DeltaTriplesTestHelpers.h         |  51 ++++++
 test/ExecuteUpdateTest.cpp             | 221 +++++++++++++++++++++++++
 test/QueryPlannerTestHelpers.h         |   3 +-
 12 files changed, 434 insertions(+), 37 deletions(-)
 create mode 100644 src/engine/ExecuteUpdate.cpp
 create mode 100644 src/engine/ExecuteUpdate.h
 create mode 100644 test/DeltaTriplesTestHelpers.h
 create mode 100644 test/ExecuteUpdateTest.cpp

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index 41a9a33a68..cbfb3344c3 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -13,5 +13,5 @@ add_library(engine
         VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
-        CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp)
+        CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/engine/ExecuteUpdate.cpp b/src/engine/ExecuteUpdate.cpp
new file mode 100644
index 0000000000..ef27c6a8d4
--- /dev/null
+++ b/src/engine/ExecuteUpdate.cpp
@@ -0,0 +1,101 @@
+//  Copyright 2024, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+
+#include "engine/ExecuteUpdate.h"
+
+#include "engine/ExportQueryExecutionTrees.h"
+
+// _____________________________________________________________________________
+std::pair<std::vector<ExecuteUpdate::TransformedTriple>, LocalVocab>
+ExecuteUpdate::transformTriplesTemplate(
+    const Index::Vocab& vocab, const VariableToColumnMap& variableColumns,
+    std::vector<SparqlTripleSimpleWithGraph>&& triples) {
+  // This LocalVocab only contains IDs that are related to the
+  // template. Most of the IDs will be added to the DeltaTriples' LocalVocab. An
+  // ID will only not be added if it belongs to a Quad with a variable that has
+  // no solutions.
+  LocalVocab localVocab{};
+
+  auto transformSparqlTripleComponent =
+      [&vocab, &localVocab,
+       &variableColumns](TripleComponent component) -> IdOrVariableIndex {
+    if (component.isVariable()) {
+      AD_CORRECTNESS_CHECK(variableColumns.contains(component.getVariable()));
+      return variableColumns.at(component.getVariable()).columnIndex_;
+    } else {
+      return std::move(component).toValueId(vocab, localVocab);
+    }
+  };
+  Id defaultGraphIri = [&transformSparqlTripleComponent] {
+    IdOrVariableIndex defaultGraph = transformSparqlTripleComponent(
+        ad_utility::triple_component::Iri::fromIriref(DEFAULT_GRAPH_IRI));
+    AD_CORRECTNESS_CHECK(std::holds_alternative<Id>(defaultGraph));
+    return std::get<Id>(defaultGraph);
+  }();
+  auto transformGraph =
+      [&vocab, &localVocab, &defaultGraphIri,
+       &variableColumns](SparqlTripleSimpleWithGraph::Graph graph) {
+        return std::visit(
+            ad_utility::OverloadCallOperator{
+                [&defaultGraphIri](const std::monostate&) -> IdOrVariableIndex {
+                  return defaultGraphIri;
+                },
+                [&vocab, &localVocab](const Iri& iri) -> IdOrVariableIndex {
+                  ad_utility::triple_component::Iri i =
+                      ad_utility::triple_component::Iri::fromIriref(iri.iri());
+                  return TripleComponent(i).toValueId(vocab, localVocab);
+                },
+                [&variableColumns](const Variable& var) -> IdOrVariableIndex {
+                  AD_CORRECTNESS_CHECK(variableColumns.contains(var));
+                  return variableColumns.at(var).columnIndex_;
+                }},
+            graph);
+      };
+  auto transformSparqlTripleSimple =
+      [&transformSparqlTripleComponent,
+       &transformGraph](SparqlTripleSimpleWithGraph triple) {
+        return std::array{transformSparqlTripleComponent(std::move(triple.s_)),
+                          transformSparqlTripleComponent(std::move(triple.p_)),
+                          transformSparqlTripleComponent(std::move(triple.o_)),
+                          transformGraph(std::move(triple.g_))};
+      };
+  return {
+      ad_utility::transform(std::move(triples), transformSparqlTripleSimple),
+      std::move(localVocab)};
+}
+
+// _____________________________________________________________________________
+std::optional<Id> ExecuteUpdate::resolveVariable(const IdTable& idTable,
+                                                 const uint64_t& rowIdx,
+                                                 IdOrVariableIndex idOrVar) {
+  auto visitId = [](const Id& id) {
+    return id.isUndefined() ? std::optional<Id>{} : id;
+  };
+  return std::visit(
+      ad_utility::OverloadCallOperator{
+          [&idTable, &rowIdx, &visitId](const ColumnIndex& columnInfo) {
+            return visitId(idTable(rowIdx, columnInfo));
+          },
+          visitId},
+      idOrVar);
+}
+
+// _____________________________________________________________________________
+void ExecuteUpdate::computeAndAddQuadsForResultRow(
+    const std::vector<TransformedTriple>& templates,
+    std::vector<IdTriple<>>& result, const IdTable& idTable,
+    const uint64_t rowIdx) {
+  for (const auto& [s, p, o, g] : templates) {
+    auto subject = resolveVariable(idTable, rowIdx, s);
+    auto predicate = resolveVariable(idTable, rowIdx, p);
+    auto object = resolveVariable(idTable, rowIdx, o);
+    auto graph = resolveVariable(idTable, rowIdx, g);
+
+    if (!subject.has_value() || !predicate.has_value() || !object.has_value() ||
+        !graph.has_value()) {
+      continue;
+    }
+    result.emplace_back(std::array{*subject, *predicate, *object, *graph});
+  }
+}
diff --git a/src/engine/ExecuteUpdate.h b/src/engine/ExecuteUpdate.h
new file mode 100644
index 0000000000..729e65d51c
--- /dev/null
+++ b/src/engine/ExecuteUpdate.h
@@ -0,0 +1,44 @@
+//  Copyright 2024, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+
+#pragma once
+
+#include <gtest/gtest_prod.h>
+
+#include "index/Index.h"
+#include "parser/ParsedQuery.h"
+#include "util/CancellationHandle.h"
+
+class ExecuteUpdate {
+ public:
+  using CancellationHandle = ad_utility::SharedCancellationHandle;
+  using IdOrVariableIndex = std::variant<Id, ColumnIndex>;
+  using TransformedTriple = std::array<IdOrVariableIndex, 4>;
+
+ private:
+  // Resolve all `TripleComponent`s and `Graph`s in a vector of
+  // `SparqlTripleSimpleWithGraph` into `Variable`s or `Id`s.
+  static std::pair<std::vector<TransformedTriple>, LocalVocab>
+  transformTriplesTemplate(const Index::Vocab& vocab,
+                           const VariableToColumnMap& variableColumns,
+                           std::vector<SparqlTripleSimpleWithGraph>&& triples);
+  FRIEND_TEST(ExecuteUpdate, transformTriplesTemplate);
+
+  // Resolve a single `IdOrVariable` to an `Id` by looking up the value in the
+  // result row. The `Id`s will never be undefined. If (and only if) the input
+  // `Id` or the `Id` looked up in the `IdTable` is undefined then
+  // `std::nullopt` is returned.
+  static std::optional<Id> resolveVariable(const IdTable& idTable,
+                                           const uint64_t& rowIdx,
+                                           IdOrVariableIndex idOrVar);
+  FRIEND_TEST(ExecuteUpdate, resolveVariable);
+
+  // Calculate and add the set of quads for the update that results from
+  // interpolating one result row into the template. The resulting `IdTriple`s
+  // consist of only `Id`s.
+  static void computeAndAddQuadsForResultRow(
+      const std::vector<TransformedTriple>& templates,
+      std::vector<IdTriple<>>& result, const IdTable& idTable, uint64_t rowIdx);
+  FRIEND_TEST(ExecuteUpdate, computeAndAddQuadsForResultRow);
+};
diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h
index d8a42b4d48..91a37b6d40 100644
--- a/src/engine/ExportQueryExecutionTrees.h
+++ b/src/engine/ExportQueryExecutionTrees.h
@@ -192,9 +192,11 @@ class ExportQueryExecutionTrees {
   // Return a range that contains the indices of the rows that have to be
   // exported from the `idTable` given the `LimitOffsetClause`. It takes into
   // account the LIMIT, the OFFSET, and the actual size of the `idTable`
+ public:
   static cppcoro::generator<TableWithRange> getRowIndices(
       LimitOffsetClause limitOffset, const Result& result);
 
+ private:
   FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator);
   FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator);
   FRIEND_TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable);
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index cb9bf96a34..5a5085a23c 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -797,6 +797,9 @@ Awaitable<void> Server::processQuery(
   auto qet = plannedQuery.queryExecutionTree_;
 
   if (plannedQuery.parsedQuery_.hasUpdateClause()) {
+    // This may be caused by a bug (the code is not yet tested well) or by an
+    // attack which tries to circumvent (not yet existing) access controls for
+    // Update.
     throw std::runtime_error("Expected Query but received Update.");
   }
 
diff --git a/src/parser/data/Iri.h b/src/parser/data/Iri.h
index 7b4cfea167..10685f4bb4 100644
--- a/src/parser/data/Iri.h
+++ b/src/parser/data/Iri.h
@@ -8,6 +8,7 @@
 
 #include "parser/data/ConstructQueryExportContext.h"
 
+// TODO: replace usages of this class with `ad_utility::triple_component::Iri`
 class Iri {
   std::string _string;
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index cd64d5c0b6..e9b2cf8347 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -416,3 +416,5 @@ addLinkAndDiscoverTest(SparqlExpressionGeneratorsTest engine)
 addLinkAndDiscoverTest(UrlParserTest)
 
 addLinkAndDiscoverTest(ServerTest engine)
+
+addLinkAndDiscoverTest(ExecuteUpdateTest engine)
diff --git a/test/CachingMemoryResourceTest.cpp b/test/CachingMemoryResourceTest.cpp
index 5d3a977a5a..d376e35516 100644
--- a/test/CachingMemoryResourceTest.cpp
+++ b/test/CachingMemoryResourceTest.cpp
@@ -35,6 +35,10 @@ TEST(CachingMemoryResource, allocateAndDeallocate) {
 
   ptr->deallocate(p12a, 1, 2);
   ptr->deallocate(p12b, 1, 2);
+
+  // Reset the default resource to the default resource, such that subsequent
+  // unit test running in the same binary won't run into trouble.
+  std::pmr::set_default_resource(nullptr);
 }
 
 TEST(CachingMemoryResource, equality) {
diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp
index 8871a43c0e..e9e858d727 100644
--- a/test/DeltaTriplesTest.cpp
+++ b/test/DeltaTriplesTest.cpp
@@ -6,6 +6,7 @@
 
 #include <gtest/gtest.h>
 
+#include "./DeltaTriplesTestHelpers.h"
 #include "./util/GTestHelpers.h"
 #include "./util/IndexTestHelpers.h"
 #include "absl/strings/str_split.h"
@@ -15,41 +16,7 @@
 #include "index/Permutation.h"
 #include "parser/RdfParser.h"
 
-namespace {
-// A matcher that applies `InnerMatcher` to all `LocatedTriplesPerBlock` of a
-// `DeltaTriples`.
-auto InAllPermutations =
-    [](testing::Matcher<const LocatedTriplesPerBlock&> InnerMatcher)
-    -> testing::Matcher<const DeltaTriples&> {
-  return testing::AllOfArray(ad_utility::transform(
-      Permutation::ALL, [&InnerMatcher](const Permutation::Enum& perm) {
-        return testing::ResultOf(
-            absl::StrCat(".getLocatedTriplesPerBlock(",
-                         Permutation::toString(perm), ")"),
-            [perm](const DeltaTriples& deltaTriples) {
-              return deltaTriples.getLocatedTriplesPerBlock(perm);
-            },
-            InnerMatcher);
-      }));
-};
-// A matcher that checks `numTriples()` for all `LocatedTriplesPerBlock` of a
-// `DeltaTriples`.
-auto NumTriplesInAllPermutations =
-    [](size_t expectedNumTriples) -> testing::Matcher<const DeltaTriples&> {
-  return InAllPermutations(AD_PROPERTY(LocatedTriplesPerBlock, numTriples,
-                                       testing::Eq(expectedNumTriples)));
-};
-// A matcher that checks `numInserted()` and `numDeleted()` of a `DeltaTriples`
-// and `numTriples()` for all `LocatedTriplesPerBlock` of the `DeltaTriples`.
-auto NumTriples =
-    [](size_t inserted, size_t deleted,
-       size_t inAllPermutations) -> testing::Matcher<const DeltaTriples&> {
-  return testing::AllOf(
-      AD_PROPERTY(DeltaTriples, numInserted, testing::Eq(inserted)),
-      AD_PROPERTY(DeltaTriples, numDeleted, testing::Eq(deleted)),
-      NumTriplesInAllPermutations(inAllPermutations));
-};
-}  // namespace
+using namespace deltaTriplesTestHelpers;
 
 // Fixture that sets up a test index.
 class DeltaTriplesTest : public ::testing::Test {
diff --git a/test/DeltaTriplesTestHelpers.h b/test/DeltaTriplesTestHelpers.h
new file mode 100644
index 0000000000..586a54196a
--- /dev/null
+++ b/test/DeltaTriplesTestHelpers.h
@@ -0,0 +1,51 @@
+// Copyright 2024, University of Freiburg
+//  Chair of Algorithms and Data Structures.
+//  Authors:
+//    2024 Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "index/DeltaTriples.h"
+#include "index/LocatedTriples.h"
+#include "util/GTestHelpers.h"
+
+#pragma once
+
+namespace deltaTriplesTestHelpers {
+
+// A matcher that applies `InnerMatcher` to all `LocatedTriplesPerBlock` of a
+// `DeltaTriples`.
+inline auto InAllPermutations =
+    [](testing::Matcher<const LocatedTriplesPerBlock&> InnerMatcher)
+    -> testing::Matcher<const DeltaTriples&> {
+  return testing::AllOfArray(ad_utility::transform(
+      Permutation::ALL, [&InnerMatcher](const Permutation::Enum& perm) {
+        return testing::ResultOf(
+            absl::StrCat(".getLocatedTriplesPerBlock(",
+                         Permutation::toString(perm), ")"),
+            [perm](const DeltaTriples& deltaTriples) {
+              return deltaTriples.getLocatedTriplesPerBlock(perm);
+            },
+            InnerMatcher);
+      }));
+};
+// A matcher that checks `numTriples()` for all `LocatedTriplesPerBlock` of a
+// `DeltaTriples`.
+inline auto NumTriplesInAllPermutations =
+    [](size_t expectedNumTriples) -> testing::Matcher<const DeltaTriples&> {
+  return InAllPermutations(AD_PROPERTY(LocatedTriplesPerBlock, numTriples,
+                                       testing::Eq(expectedNumTriples)));
+};
+// A matcher that checks `numInserted()` and `numDeleted()` of a `DeltaTriples`
+// and `numTriples()` for all `LocatedTriplesPerBlock` of the `DeltaTriples`.
+inline auto NumTriples =
+    [](size_t inserted, size_t deleted,
+       size_t inAllPermutations) -> testing::Matcher<const DeltaTriples&> {
+  return testing::AllOf(
+      AD_PROPERTY(DeltaTriples, numInserted, testing::Eq(inserted)),
+      AD_PROPERTY(DeltaTriples, numDeleted, testing::Eq(deleted)),
+      NumTriplesInAllPermutations(inAllPermutations));
+};
+
+}  // namespace deltaTriplesTestHelpers
diff --git a/test/ExecuteUpdateTest.cpp b/test/ExecuteUpdateTest.cpp
new file mode 100644
index 0000000000..08c4ec284e
--- /dev/null
+++ b/test/ExecuteUpdateTest.cpp
@@ -0,0 +1,221 @@
+// Copyright 2024, University of Freiburg,
+// Chair of Algorithms and Data Structures.
+// Author: Julian Mundhahs (mundhahj@tf.uni-freiburg.de)
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "DeltaTriplesTestHelpers.h"
+#include "QueryPlannerTestHelpers.h"
+#include "engine/ExecuteUpdate.h"
+#include "index/IndexImpl.h"
+#include "parser/sparqlParser/SparqlQleverVisitor.h"
+#include "util/GTestHelpers.h"
+#include "util/IdTableHelpers.h"
+#include "util/IndexTestHelpers.h"
+
+using namespace deltaTriplesTestHelpers;
+
+auto V = [](const uint64_t index) {
+  return Id::makeFromVocabIndex(VocabIndex::make(index));
+};
+
+// `ExecuteUpdate::IdOrVariableIndex` extended by `LiteralOrIri` which denotes
+// an entry from the local vocab.
+using TripleComponentT =
+    std::variant<Id, ColumnIndex, ad_utility::triple_component::LiteralOrIri>;
+
+// A matcher that never matches and outputs the given message.
+MATCHER_P(AlwaysFalse, msg, "") {
+  (void)arg;  // avoid compiler warning for unused value.
+  *result_listener << msg;
+  return false;
+}
+
+// _____________________________________________________________________________
+TEST(ExecuteUpdate, transformTriplesTemplate) {
+  // Create an index for testing.
+  const auto qec = ad_utility::testing::getQec("<bar> <bar> \"foo\"");
+  const Index& index = qec->getIndex();
+  // We need a non-const vocab for the test.
+  auto& vocab = const_cast<Index::Vocab&>(index.getVocab());
+
+  // Helpers
+  const auto Id = ad_utility::testing::makeGetId(index);
+  using Graph = SparqlTripleSimpleWithGraph::Graph;
+  using LocalVocab = ad_utility::triple_component::LiteralOrIri;
+  auto defaultGraphId = Id(std::string{DEFAULT_GRAPH_IRI});
+  auto Iri = [](const std::string& iri) {
+    return ad_utility::triple_component::Iri::fromIriref(iri);
+  };
+  auto Literal = [](const std::string& literal) {
+    return ad_utility::triple_component::Literal::fromStringRepresentation(
+        literal);
+  };
+  // Matchers
+  using MatcherType = testing::Matcher<const ExecuteUpdate::IdOrVariableIndex&>;
+  auto TripleComponentMatcher = [](const ::LocalVocab& localVocab,
+                                   TripleComponentT component) -> MatcherType {
+    return std::visit(
+        ad_utility::OverloadCallOperator{
+            [](const ::Id& id) -> MatcherType {
+              return testing::VariantWith<::Id>(testing::Eq(id));
+            },
+            [](const ColumnIndex& index) -> MatcherType {
+              return testing::VariantWith<ColumnIndex>(testing::Eq(index));
+            },
+            [&localVocab](
+                const ad_utility::triple_component::LiteralOrIri& literalOrIri)
+                -> MatcherType {
+              const auto lviOpt = localVocab.getIndexOrNullopt(literalOrIri);
+              if (!lviOpt) {
+                return AlwaysFalse(
+                    absl::StrCat(literalOrIri.toStringRepresentation(),
+                                 " not in local vocab"));
+              }
+              const auto id = Id::makeFromLocalVocabIndex(lviOpt.value());
+              return testing::VariantWith<::Id>(
+                  AD_PROPERTY(Id, getBits, testing::Eq(id.getBits())));
+            }},
+        component);
+  };
+  auto expectTransformTriplesTemplate =
+      [&vocab, &TripleComponentMatcher](
+          const VariableToColumnMap& variableColumns,
+          std::vector<SparqlTripleSimpleWithGraph>&& triples,
+          const std::vector<std::array<TripleComponentT, 4>>&
+              expectedTransformedTriples) {
+        auto [transformedTriples, localVocab] =
+            ExecuteUpdate::transformTriplesTemplate(vocab, variableColumns,
+                                                    std::move(triples));
+        const auto transformedTriplesMatchers = ad_utility::transform(
+            expectedTransformedTriples,
+            [&localVocab, &TripleComponentMatcher](const auto& expectedTriple) {
+              return ElementsAre(
+                  TripleComponentMatcher(localVocab, expectedTriple.at(0)),
+                  TripleComponentMatcher(localVocab, expectedTriple.at(1)),
+                  TripleComponentMatcher(localVocab, expectedTriple.at(2)),
+                  TripleComponentMatcher(localVocab, expectedTriple.at(3)));
+            });
+        EXPECT_THAT(transformedTriples,
+                    testing::ElementsAreArray(transformedTriplesMatchers));
+      };
+  auto expectTransformTriplesTemplateFails =
+      [&vocab](const VariableToColumnMap& variableColumns,
+               std::vector<SparqlTripleSimpleWithGraph>&& triples,
+               const testing::Matcher<const std::string&>& messageMatcher) {
+        AD_EXPECT_THROW_WITH_MESSAGE(
+            ExecuteUpdate::transformTriplesTemplate(vocab, variableColumns,
+                                                    std::move(triples)),
+            messageMatcher);
+      };
+  // Transforming an empty vector of template results in no `TransformedTriple`s
+  // and leaves the `LocalVocab` empty.
+  expectTransformTriplesTemplate({}, {}, {});
+  // Resolve a `SparqlTripleSimpleWithGraph` without variables.
+  expectTransformTriplesTemplate(
+      {},
+      {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
+                                   Literal("\"foo\""), Graph{}}},
+      {{Id("\"foo\""), Id("<bar>"), Id("\"foo\""), defaultGraphId}});
+  // Literals in the template that are not in the index are added to the
+  // `LocalVocab`.
+  expectTransformTriplesTemplate(
+      {},
+      {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
+                                   Literal("\"foo\""), Graph{::Iri("<baz>")}}},
+      {{Id("\"foo\""), Id("<bar>"), Id("\"foo\""), LocalVocab(Iri("<baz>"))}});
+  // A variable in the template (`?f`) is not mapped in the
+  // `VariableToColumnMap`.
+  expectTransformTriplesTemplateFails(
+      {},
+      {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
+                                   Variable("?f"), Graph{}}},
+      testing::HasSubstr(
+          "Assertion `variableColumns.contains(component.getVariable())` "
+          "failed."));
+  expectTransformTriplesTemplateFails(
+      {},
+      {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
+                                   Literal("\"foo\""), Graph{Variable("?f")}}},
+      testing::HasSubstr("Assertion `variableColumns.contains(var)` failed."));
+  // Variables in the template are mapped to their column index.
+  expectTransformTriplesTemplate(
+      {{Variable("?f"), {0, ColumnIndexAndTypeInfo::PossiblyUndefined}}},
+      {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
+                                   Variable("?f"), Graph{}}},
+      {{Id("\"foo\""), Id("<bar>"), 0UL, defaultGraphId}});
+  expectTransformTriplesTemplate(
+      {{Variable("?f"), {0, ColumnIndexAndTypeInfo::PossiblyUndefined}}},
+      {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
+                                   Literal("\"foo\""), Graph{Variable("?f")}}},
+      {{Id("\"foo\""), Id("<bar>"), Id("\"foo\""), 0UL}});
+}
+
+// _____________________________________________________________________________
+TEST(ExecuteUpdate, resolveVariable) {
+  const auto idTable =
+      makeIdTableFromVector({{V(0), V(1), V(2)},
+                             {V(3), V(4), V(5)},
+                             {V(6), Id::makeUndefined(), V(8)}});
+  auto resolveVariable =
+      std::bind_front(&ExecuteUpdate::resolveVariable, std::cref(idTable));
+  EXPECT_THAT(resolveVariable(0, V(10)), testing::Eq(V(10)));
+  EXPECT_THAT(resolveVariable(0, 1UL), testing::Eq(V(1)));
+  EXPECT_THAT(resolveVariable(1, 1UL), testing::Eq(V(4)));
+  EXPECT_THAT(resolveVariable(2, 1UL), testing::Eq(std::nullopt));
+  EXPECT_THAT(resolveVariable(2, Id::makeUndefined()),
+              testing::Eq(std::nullopt));
+}
+
+// _____________________________________________________________________________
+TEST(ExecuteUpdate, computeAndAddQuadsForResultRow) {
+  const auto idTable =
+      makeIdTableFromVector({{V(0), V(1), V(2)},
+                             {V(3), V(4), V(5)},
+                             {V(6), Id::makeUndefined(), V(8)}});
+  auto expectComputeQuads =
+      [](const std::vector<ExecuteUpdate::TransformedTriple>& templates,
+         const IdTable& idTable, uint64_t rowIdx,
+         const testing::Matcher<const std::vector<IdTriple<>>&>&
+             expectedQuads) {
+        std::vector<IdTriple<>> result;
+        ExecuteUpdate::computeAndAddQuadsForResultRow(templates, result,
+                                                      idTable, rowIdx);
+        EXPECT_THAT(result, expectedQuads);
+      };
+  // Compute the quads for an empty template set yields no quads.
+  expectComputeQuads({}, idTable, 0, testing::IsEmpty());
+  // Compute the quads for template without variables yields the templates
+  // unmodified.
+  expectComputeQuads(
+      {{V(0), V(1), V(2), V(3)}}, idTable, 0,
+      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(2), V(3)}}}));
+  expectComputeQuads(
+      {{V(0), V(1), V(2), V(3)}}, idTable, 1,
+      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(2), V(3)}}}));
+  // The variables in templates are resolved to the value of the variable in the
+  // specified row of the result.
+  expectComputeQuads(
+      {{0UL, V(1), 1UL, V(3)}}, idTable, 0,
+      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(1), V(3)}}}));
+  expectComputeQuads(
+      {{0UL, V(1), 1UL, V(3)}}, idTable, 1,
+      testing::ElementsAreArray({IdTriple{{V(3), V(1), V(4), V(3)}}}));
+  // Quads with undefined IDs cannot be stored and are not returned.
+  expectComputeQuads({{0UL, V(1), 1UL, V(3)}}, idTable, 2, testing::IsEmpty());
+  expectComputeQuads({{V(0), V(1), Id::makeUndefined(), V(3)}}, idTable, 0,
+                     testing::IsEmpty());
+  // Some extra cases to cover all branches.
+  expectComputeQuads({{Id::makeUndefined(), V(1), V(2), V(3)}}, idTable, 0,
+                     testing::IsEmpty());
+  expectComputeQuads({{V(0), Id::makeUndefined(), V(2), V(3)}}, idTable, 0,
+                     testing::IsEmpty());
+  expectComputeQuads({{V(0), V(1), V(2), Id::makeUndefined()}}, idTable, 0,
+                     testing::IsEmpty());
+  // All the templates are evaluated for the specified row of the result.
+  expectComputeQuads(
+      {{0UL, V(1), 1UL, V(3)}, {V(0), 1UL, 2UL, V(3)}}, idTable, 0,
+      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(1), V(3)}},
+                                 IdTriple{{V(0), V(1), V(2), V(3)}}}));
+}
diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h
index 48abc6d342..5791627d9e 100644
--- a/test/QueryPlannerTestHelpers.h
+++ b/test/QueryPlannerTestHelpers.h
@@ -384,7 +384,8 @@ constexpr auto Union = MatchTypeAndOrderedChildren<::Union>;
 
 /// Parse the given SPARQL `query`, pass it to a `QueryPlanner` with empty
 /// execution context, and return the resulting `QueryExecutionTree`
-QueryExecutionTree parseAndPlan(std::string query, QueryExecutionContext* qec) {
+inline QueryExecutionTree parseAndPlan(std::string query,
+                                       QueryExecutionContext* qec) {
   ParsedQuery pq = SparqlParser::parseQuery(std::move(query));
   // TODO<joka921> make it impossible to pass `nullptr` here, properly mock a
   // queryExecutionContext.

From bb70c4a8bd9bb9505ba809ba845b0bd7df0dc9b6 Mon Sep 17 00:00:00 2001
From: Julian <14220769+Qup42@users.noreply.github.com>
Date: Thu, 7 Nov 2024 15:40:08 +0100
Subject: [PATCH 08/12] Enable parsing of Updates (#1604)

The SPARQL parser now parses all UPDATE requests. In practice, this means that the `SPARQL UPDATE is not yet supported by QLever` message is now emitted by the Server, and not anymore by the Parser, which means that we are one step close to the support of SPARQL UPDATE.
---
 .../sparqlParser/SparqlQleverVisitor.cpp      | 22 +++----
 test/SparqlAntlrParserTest.cpp                | 61 ++++++++++---------
 2 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index db5002eea6..c51d1cb1c9 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -211,21 +211,17 @@ ParsedQuery Visitor::visit(Parser::QueryContext* ctx) {
 
 // ____________________________________________________________________________________
 ParsedQuery Visitor::visit(Parser::QueryOrUpdateContext* ctx) {
-  if (ctx->update()) {
+  if (ctx->update() && !ctx->update()->update1()) {
     // An empty query currently matches the `update()` rule. We handle this
     // case manually to get a better error message. If an update query doesn't
     // have an `update1()`, then it consists of a (possibly empty) prologue, but
     // has not actual content, see the grammar in `SparqlAutomatic.g4` for
     // details.
-    if (!ctx->update()->update1()) {
-      reportError(ctx->update(),
-                  "Empty query (this includes queries that only consist "
-                  "of comments or prefix declarations).");
-    }
-    reportNotSupported(ctx->update(), "SPARQL 1.1 Update is");
-  } else {
-    return visit(ctx->query());
+    reportError(ctx->update(),
+                "Empty query (this includes queries that only consist "
+                "of comments or prefix declarations).");
   }
+  return visitAlternative<ParsedQuery>(ctx->query(), ctx->update());
 }
 
 // ____________________________________________________________________________________
@@ -392,16 +388,20 @@ std::optional<Values> Visitor::visit(Parser::ValuesClauseContext* ctx) {
 
 // ____________________________________________________________________________________
 ParsedQuery Visitor::visit(Parser::UpdateContext* ctx) {
+  // The prologue (BASE and PREFIX declarations)  only affects the internal
+  // state of the visitor.
   visit(ctx->prologue());
 
-  auto query = visit(ctx->update1());
+  auto update = visit(ctx->update1());
 
   if (ctx->update()) {
     parsedQuery_ = ParsedQuery{};
     reportNotSupported(ctx->update(), "Multiple updates in one query are");
   }
 
-  return query;
+  update._originalString = ctx->getStart()->getInputStream()->toString();
+
+  return update;
 }
 
 // ____________________________________________________________________________________
diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp
index 8983a27a3f..4b93153522 100644
--- a/test/SparqlAntlrParserTest.cpp
+++ b/test/SparqlAntlrParserTest.cpp
@@ -1926,32 +1926,6 @@ TEST(SparqlParser, aggregateExpressions) {
       matchAggregate<GroupConcatExpression>(true, V{"?x"}, separator(";")));
 }
 
-// Update queries are WIP. The individual parts to parse some update queries
-// are in place the code to process them is still unfinished. Therefore we
-// don't accept update queries.
-TEST(SparqlParser, updateQueryUnsupported) {
-  auto expectUpdateFails = ExpectParseFails<&Parser::queryOrUpdate>{};
-  auto contains = [](const std::string& s) { return ::testing::HasSubstr(s); };
-  auto updateUnsupported =
-      contains("SPARQL 1.1 Update is currently not supported by QLever.");
-
-  // Test all the cases because some functionality will be enabled shortly.
-  expectUpdateFails("INSERT DATA { <a> <b> <c> }", updateUnsupported);
-  expectUpdateFails("DELETE DATA { <a> <b> <c> }", updateUnsupported);
-  expectUpdateFails("DELETE { <a> <b> <c> } WHERE { ?s ?p ?o }",
-                    updateUnsupported);
-  expectUpdateFails("INSERT { <a> <b> <c> } WHERE { ?s ?p ?o }",
-                    updateUnsupported);
-  expectUpdateFails("DELETE WHERE { <a> <b> <c> }", updateUnsupported);
-  expectUpdateFails("LOAD <a>", updateUnsupported);
-  expectUpdateFails("CLEAR GRAPH <a>", updateUnsupported);
-  expectUpdateFails("DROP GRAPH <a>", updateUnsupported);
-  expectUpdateFails("CREATE GRAPH <a>", updateUnsupported);
-  expectUpdateFails("ADD GRAPH <a> TO DEFAULT", updateUnsupported);
-  expectUpdateFails("MOVE DEFAULT TO GRAPH <a>", updateUnsupported);
-  expectUpdateFails("COPY GRAPH <a> TO GRAPH <a>", updateUnsupported);
-}
-
 TEST(SparqlParser, Quads) {
   auto expectQuads = ExpectCompleteParse<&Parser::quads>{defaultPrefixMap};
   auto expectQuadsFails = ExpectParseFails<&Parser::quads>{};
@@ -2011,8 +1985,14 @@ TEST(SparqlParser, QuadData) {
   expectQuadDataFails("{ GRAPH ?foo { <a> <b> <c> } }");
 }
 
-TEST(SparqlParser, UpdateQuery) {
-  auto expectUpdate = ExpectCompleteParse<&Parser::update>{defaultPrefixMap};
+TEST(SparqlParser, Update) {
+  auto expectUpdate_ = ExpectCompleteParse<&Parser::update>{defaultPrefixMap};
+  // Automatically test all updates for their `_originalString`.
+  auto expectUpdate = [&expectUpdate_](const std::string& query,
+                                       auto&& expected) {
+    expectUpdate_(query,
+                  testing::AllOf(expected, m::pq::OriginalString(query)));
+  };
   auto expectUpdateFails = ExpectParseFails<&Parser::update>{};
   auto Iri = [](std::string_view stringWithBrackets) {
     return TripleComponent::Iri::fromIriref(stringWithBrackets);
@@ -2022,6 +2002,7 @@ TEST(SparqlParser, UpdateQuery) {
   };
   auto noGraph = std::monostate{};
 
+  // Test the parsing of the update clause in the ParsedQuery.
   expectUpdate(
       "INSERT DATA { <a> <b> <c> }",
       m::UpdateClause(
@@ -2144,13 +2125,35 @@ TEST(SparqlParser, UpdateQuery) {
                                m::GraphPattern()));
 }
 
-TEST(SparqlParser, EmptyQuery) {
+TEST(SparqlParser, QueryOrUpdate) {
+  auto expectQuery =
+      ExpectCompleteParse<&Parser::queryOrUpdate>{defaultPrefixMap};
   auto expectQueryFails = ExpectParseFails<&Parser::queryOrUpdate>{};
+  auto Iri = [](std::string_view stringWithBrackets) {
+    return TripleComponent::Iri::fromIriref(stringWithBrackets);
+  };
+  // Empty queries (queries without any query or update operation) are
+  // forbidden.
   auto emptyMatcher = ::testing::HasSubstr("Empty quer");
   expectQueryFails("", emptyMatcher);
   expectQueryFails(" ", emptyMatcher);
   expectQueryFails("PREFIX ex: <http://example.org>", emptyMatcher);
   expectQueryFails("### Some comment \n \n #someMoreComments", emptyMatcher);
+  // Hit all paths for coverage.
+  expectQuery("SELECT ?a WHERE { ?a <is-a> <b> }",
+              AllOf(m::SelectQuery(m::Select({Var{"?a"}}),
+                                   m::GraphPattern(m::Triples(
+                                       {{Var{"?a"}, "<is-a>", Iri("<b>")}}))),
+                    m::pq::OriginalString("SELECT ?a WHERE { ?a <is-a> <b> }"),
+                    m::VisibleVariables({Var{"?a"}})));
+  expectQuery(
+      "INSERT DATA { <a> <b> <c> }",
+      AllOf(m::UpdateClause(m::GraphUpdate({},
+                                           {{Iri("<a>"), Iri("<b>"), Iri("<c>"),
+                                             std::monostate{}}},
+                                           std::nullopt),
+                            m::GraphPattern()),
+            m::pq::OriginalString("INSERT DATA { <a> <b> <c> }")));
 }
 
 TEST(SparqlParser, GraphOrDefault) {

From 1ddf5e09cc1a7819385e35cb09276c808c464301 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <joka921@users.noreply.github.com>
Date: Fri, 8 Nov 2024 14:21:21 +0100
Subject: [PATCH 09/12] Add `DeltaTriplesManager` (#1603)

The already existing `DeltaTriples` class maintains a dynamically changing set of insertions and deletions relative to the original input data, together with a (single) local vocab. The class is not threadsafe and has to be used with care. In particular, concurrent update queries have to be serialized, and while a query makes use of the "delta triples", it has to be made sure that they are not changed over the course of the processing of that query.

Both of these problems are solved by the new `DeltaTriplesManager` class. The index has a single object of this class. It maintains a single `DeltaTriples` object, write access to which is strictly serialized. Each new query gets a so-called *snapshot* of the current delta triples. This is a full copy (of the delta triples located in each of the permutations and of the local vocab). These snapshots are read-only and multiple queries can share the same snapshot. A snapshot lives as long as one query using it is still being processed.
---
 src/engine/CountAvailablePredicates.cpp |   7 +-
 src/engine/GroupBy.cpp                  |  10 +--
 src/engine/HasPredicateScan.cpp         |  13 +--
 src/engine/IndexScan.cpp                |  12 +--
 src/engine/Operation.h                  |   4 +-
 src/engine/QueryExecutionContext.h      |  25 +++---
 src/engine/QueryExecutionTree.cpp       |   9 +--
 src/engine/QueryExecutionTree.h         |   8 +-
 src/index/DeltaTriples.cpp              |  62 +++++++++++---
 src/index/DeltaTriples.h                |  94 +++++++++++++++++-----
 src/index/Index.cpp                     |  49 +++++++-----
 src/index/Index.h                       |  36 +++++----
 src/index/IndexImpl.cpp                 |  35 ++++----
 src/index/IndexImpl.h                   |  33 +++++---
 src/index/LocatedTriples.cpp            |  16 ++++
 src/index/LocatedTriples.h              |  10 ++-
 src/index/Permutation.cpp               |  58 ++++++++------
 src/index/Permutation.h                 |  41 +++++-----
 test/DeltaTriplesTest.cpp               | 102 ++++++++++++++++++++++++
 test/DeltaTriplesTestHelpers.h          |   2 +-
 test/IndexTest.cpp                      |  66 +++++++++------
 test/util/IndexTestHelpers.cpp          |  16 ++--
 22 files changed, 497 insertions(+), 211 deletions(-)

diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp
index 095015d6a2..e78fcca694 100644
--- a/src/engine/CountAvailablePredicates.cpp
+++ b/src/engine/CountAvailablePredicates.cpp
@@ -165,9 +165,10 @@ void CountAvailablePredicates::computePatternTrickAllEntities(
           TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt,
           std::nullopt}
           .toScanSpecification(index);
-  auto fullHasPattern = index.getPermutation(Permutation::Enum::PSO)
-                            .lazyScan(scanSpec, std::nullopt, {},
-                                      cancellationHandle_, deltaTriples());
+  auto fullHasPattern =
+      index.getPermutation(Permutation::Enum::PSO)
+          .lazyScan(scanSpec, std::nullopt, {}, cancellationHandle_,
+                    locatedTriplesSnapshot());
   for (const auto& idTable : fullHasPattern) {
     for (const auto& patternId : idTable.getColumn(1)) {
       AD_CORRECTNESS_CHECK(patternId.getDatatype() == Datatype::Int);
diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp
index cc00887845..e6ff853c48 100644
--- a/src/engine/GroupBy.cpp
+++ b/src/engine/GroupBy.cpp
@@ -665,7 +665,7 @@ std::optional<IdTable> GroupBy::computeGroupByObjectWithCount() const {
       getExecutionContext()->getIndex().getPimpl().getPermutation(
           indexScan->permutation());
   auto result = permutation.getDistinctCol1IdsAndCounts(
-      col0Id.value(), cancellationHandle_, deltaTriples());
+      col0Id.value(), cancellationHandle_, locatedTriplesSnapshot());
   indexScan->updateRuntimeInformationWhenOptimizedOut(
       {}, RuntimeInformation::Status::optimizedOut);
 
@@ -717,8 +717,8 @@ std::optional<IdTable> GroupBy::computeGroupByForFullIndexScan() const {
   const auto& permutation =
       getExecutionContext()->getIndex().getPimpl().getPermutation(
           permutationEnum.value());
-  auto table = permutation.getDistinctCol0IdsAndCounts(cancellationHandle_,
-                                                       deltaTriples());
+  auto table = permutation.getDistinctCol0IdsAndCounts(
+      cancellationHandle_, locatedTriplesSnapshot());
   if (numCounts == 0) {
     table.setColumnSubset({{0}});
   }
@@ -840,7 +840,7 @@ std::optional<IdTable> GroupBy::computeGroupByForJoinWithFullScan() const {
   Id currentId = subresult->idTable()(0, columnIndex);
   size_t currentCount = 0;
   size_t currentCardinality =
-      index.getCardinality(currentId, permutation, deltaTriples());
+      index.getCardinality(currentId, permutation, locatedTriplesSnapshot());
 
   auto pushRow = [&]() {
     // If the count is 0 this means that the element with the `currentId`
@@ -863,7 +863,7 @@ std::optional<IdTable> GroupBy::computeGroupByForJoinWithFullScan() const {
       // without the internally added triples, but that is not easy to
       // retrieve right now.
       currentCardinality =
-          index.getCardinality(id, permutation, deltaTriples());
+          index.getCardinality(id, permutation, locatedTriplesSnapshot());
     }
     currentCount += currentCardinality;
   }
diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp
index 5c494ab13c..b01ede635b 100644
--- a/src/engine/HasPredicateScan.cpp
+++ b/src/engine/HasPredicateScan.cpp
@@ -267,9 +267,10 @@ ProtoResult HasPredicateScan::computeResult(
           TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt,
           std::nullopt}
           .toScanSpecification(index);
-  auto hasPattern = index.getPermutation(Permutation::Enum::PSO)
-                        .lazyScan(scanSpec, std::nullopt, {},
-                                  cancellationHandle_, deltaTriples());
+  auto hasPattern =
+      index.getPermutation(Permutation::Enum::PSO)
+          .lazyScan(scanSpec, std::nullopt, {}, cancellationHandle_,
+                    locatedTriplesSnapshot());
 
   auto getId = [this](const TripleComponent tc) {
     std::optional<Id> id = tc.toValueId(getIndex().getVocab());
@@ -339,9 +340,9 @@ void HasPredicateScan::computeFreeO(
           TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), subjectAsId,
           std::nullopt}
           .toScanSpecification(index);
-  auto hasPattern =
-      index.getPermutation(Permutation::Enum::PSO)
-          .scan(std::move(scanSpec), {}, cancellationHandle_, deltaTriples());
+  auto hasPattern = index.getPermutation(Permutation::Enum::PSO)
+                        .scan(std::move(scanSpec), {}, cancellationHandle_,
+                              locatedTriplesSnapshot());
   AD_CORRECTNESS_CHECK(hasPattern.numRows() <= 1);
   for (Id patternId : hasPattern.getColumn(0)) {
     const auto& pattern = patterns[patternId.getInt()];
diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
index 9217e91ac0..dc6781caef 100644
--- a/src/engine/IndexScan.cpp
+++ b/src/engine/IndexScan.cpp
@@ -162,7 +162,7 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) {
   const auto& index = _executionContext->getIndex();
   idTable =
       index.scan(getScanSpecification(), permutation_, additionalColumns(),
-                 cancellationHandle_, deltaTriples(), getLimit());
+                 cancellationHandle_, locatedTriplesSnapshot(), getLimit());
   AD_CORRECTNESS_CHECK(idTable.numColumns() == getResultWidth());
   LOG(DEBUG) << "IndexScan result computation done.\n";
   checkCancellation();
@@ -174,7 +174,7 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) {
 size_t IndexScan::computeSizeEstimate() const {
   AD_CORRECTNESS_CHECK(_executionContext);
   return getIndex().getResultSizeOfScan(getScanSpecification(), permutation_,
-                                        deltaTriples());
+                                        locatedTriplesSnapshot());
 }
 
 // _____________________________________________________________________________
@@ -195,7 +195,7 @@ void IndexScan::determineMultiplicities() {
       return {1.0f};
     } else if (numVariables_ == 2) {
       return idx.getMultiplicities(*getPermutedTriple()[0], permutation_,
-                                   deltaTriples());
+                                   locatedTriplesSnapshot());
     } else {
       AD_CORRECTNESS_CHECK(numVariables_ == 3);
       return idx.getMultiplicities(permutation_);
@@ -245,8 +245,8 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
       .getImpl()
       .getPermutation(permutation())
       .lazyScan(getScanSpecification(), std::move(actualBlocks),
-                additionalColumns(), cancellationHandle_, deltaTriples(),
-                getLimit());
+                additionalColumns(), cancellationHandle_,
+                locatedTriplesSnapshot(), getLimit());
 };
 
 // ________________________________________________________________
@@ -254,7 +254,7 @@ std::optional<Permutation::MetadataAndBlocks> IndexScan::getMetadataForScan()
     const {
   const auto& index = getExecutionContext()->getIndex().getImpl();
   return index.getPermutation(permutation())
-      .getMetadataAndBlocks(getScanSpecification(), deltaTriples());
+      .getMetadataAndBlocks(getScanSpecification(), locatedTriplesSnapshot());
 };
 
 // ________________________________________________________________
diff --git a/src/engine/Operation.h b/src/engine/Operation.h
index 61702c7766..6f95633b33 100644
--- a/src/engine/Operation.h
+++ b/src/engine/Operation.h
@@ -69,8 +69,8 @@ class Operation {
 
   const Index& getIndex() const { return _executionContext->getIndex(); }
 
-  const DeltaTriples& deltaTriples() const {
-    return _executionContext->deltaTriples();
+  const auto& locatedTriplesSnapshot() const {
+    return _executionContext->locatedTriplesSnapshot();
   }
 
   // Get a unique, not ambiguous string representation for a subtree.
diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h
index 70657da59b..de7b5a4f6e 100644
--- a/src/engine/QueryExecutionContext.h
+++ b/src/engine/QueryExecutionContext.h
@@ -1,8 +1,7 @@
-// Copyright 2011, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author:
-//   2011-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de)
-//   2018-     Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de)
+// Copyright 2011 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Björn Buchhold <buchhold@cs.uni-freiburg.de> [2011 - 2017]
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de> [2017 - 2024]
 
 #pragma once
 
@@ -92,7 +91,10 @@ class QueryExecutionContext {
 
   [[nodiscard]] const Index& getIndex() const { return _index; }
 
-  const DeltaTriples& deltaTriples() const { return *deltaTriples_; }
+  const LocatedTriplesSnapshot& locatedTriplesSnapshot() const {
+    AD_CORRECTNESS_CHECK(sharedLocatedTriplesSnapshot_ != nullptr);
+    return *sharedLocatedTriplesSnapshot_;
+  }
 
   void clearCacheUnpinnedOnly() { getQueryTreeCache().clearUnpinnedOnly(); }
 
@@ -123,10 +125,13 @@ class QueryExecutionContext {
 
  private:
   const Index& _index;
-  // TODO<joka921> This has to be stored externally once we properly support
-  // SPARQL UPDATE, currently it is just a stub to make the interface work.
-  std::shared_ptr<DeltaTriples> deltaTriples_{
-      std::make_shared<DeltaTriples>(_index)};
+
+  // When the `QueryExecutionContext` is constructed, get a stable read-only
+  // snapshot of the current (located) delta triples. These can then be used
+  // by the respective query without interfering with further incoming
+  // update operations.
+  SharedLocatedTriplesSnapshot sharedLocatedTriplesSnapshot_{
+      _index.deltaTriplesManager().getCurrentSnapshot()};
   QueryResultCache* const _subtreeCache;
   // allocators are copied but hold shared state
   ad_utility::AllocatorWithLimit<Id> _allocator;
diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp
index aed58d4fde..2b2c393928 100644
--- a/src/engine/QueryExecutionTree.cpp
+++ b/src/engine/QueryExecutionTree.cpp
@@ -1,8 +1,7 @@
-// Copyright 2015, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author:
-//   2015-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de)
-//   2018-     Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de)
+// Copyright 2015 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Björn Buchhold <buchhold@cs.uni-freiburg.de> [2015 - 2017]
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de> [2017 - 2024]
 
 #include "./QueryExecutionTree.h"
 
diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h
index 0519082b78..6a4b63c712 100644
--- a/src/engine/QueryExecutionTree.h
+++ b/src/engine/QueryExecutionTree.h
@@ -1,6 +1,8 @@
-// Copyright 2015, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
+// Copyright 2015 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Björn Buchhold <buchhold@cs.uni-freiburg.de>
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
 #pragma once
 
 #include <memory>
diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp
index a07d342ec6..0d2ac3bac9 100644
--- a/src/index/DeltaTriples.cpp
+++ b/src/index/DeltaTriples.cpp
@@ -1,8 +1,8 @@
 // Copyright 2023 - 2024, University of Freiburg
-//  Chair of Algorithms and Data Structures.
-//  Authors:
-//    2023 Hannah Bast <bast@cs.uni-freiburg.de>
-//    2024 Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+//          Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #include "index/DeltaTriples.h"
 
@@ -21,8 +21,7 @@ LocatedTriples::iterator& DeltaTriples::LocatedTripleHandles::forPermutation(
 void DeltaTriples::clear() {
   triplesInserted_.clear();
   triplesDeleted_.clear();
-  std::ranges::for_each(locatedTriplesPerBlock_,
-                        &LocatedTriplesPerBlock::clear);
+  std::ranges::for_each(locatedTriples(), &LocatedTriplesPerBlock::clear);
 }
 
 // ____________________________________________________________________________
@@ -33,7 +32,7 @@ DeltaTriples::locateAndAddTriples(CancellationHandle cancellationHandle,
   std::array<std::vector<LocatedTriples::iterator>, Permutation::ALL.size()>
       intermediateHandles;
   for (auto permutation : Permutation::ALL) {
-    auto& perm = index_.getImpl().getPermutation(permutation);
+    auto& perm = index_.getPermutation(permutation);
     auto locatedTriples = LocatedTriple::locateTriplesInPermutation(
         // TODO<qup42>: replace with `getAugmentedMetadata` once integration
         //  is done
@@ -41,7 +40,7 @@ DeltaTriples::locateAndAddTriples(CancellationHandle cancellationHandle,
         cancellationHandle);
     cancellationHandle->throwIfCancelled();
     intermediateHandles[static_cast<size_t>(permutation)] =
-        locatedTriplesPerBlock_[static_cast<size_t>(permutation)].add(
+        this->locatedTriples()[static_cast<size_t>(permutation)].add(
             locatedTriples);
     cancellationHandle->throwIfCancelled();
   }
@@ -60,8 +59,8 @@ void DeltaTriples::eraseTripleInAllPermutations(LocatedTripleHandles& handles) {
   // Erase for all permutations.
   for (auto permutation : Permutation::ALL) {
     auto ltIter = handles.forPermutation(permutation);
-    locatedTriplesPerBlock_[static_cast<int>(permutation)].erase(
-        ltIter->blockIndex_, ltIter);
+    locatedTriples()[static_cast<int>(permutation)].erase(ltIter->blockIndex_,
+                                                          ltIter);
   }
 }
 
@@ -172,7 +171,48 @@ void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle,
 }
 
 // ____________________________________________________________________________
-const LocatedTriplesPerBlock& DeltaTriples::getLocatedTriplesPerBlock(
+const LocatedTriplesPerBlock&
+LocatedTriplesSnapshot::getLocatedTriplesForPermutation(
     Permutation::Enum permutation) const {
   return locatedTriplesPerBlock_[static_cast<int>(permutation)];
 }
+
+// ____________________________________________________________________________
+SharedLocatedTriplesSnapshot DeltaTriples::getSnapshot() const {
+  // NOTE: Both members of the `LocatedTriplesSnapshot` are copied, but the
+  // `localVocab_` has no copy constructor (in order to avoid accidental
+  // copies), hence the explicit `clone`.
+  return SharedLocatedTriplesSnapshot{std::make_shared<LocatedTriplesSnapshot>(
+      locatedTriples(), localVocab_.clone())};
+}
+
+// ____________________________________________________________________________
+DeltaTriples::DeltaTriples(const Index& index)
+    : DeltaTriples(index.getImpl()) {}
+
+// ____________________________________________________________________________
+DeltaTriplesManager::DeltaTriplesManager(const IndexImpl& index)
+    : deltaTriples_{index},
+      currentLocatedTriplesSnapshot_{deltaTriples_.rlock()->getSnapshot()} {}
+
+// _____________________________________________________________________________
+void DeltaTriplesManager::modify(
+    const std::function<void(DeltaTriples&)>& function) {
+  // While holding the lock for the underlying `DeltaTriples`, perform the
+  // actual `function` (typically some combination of insert and delete
+  // operations) and (while still holding the lock) update the
+  // `currentLocatedTriplesSnapshot_`.
+  deltaTriples_.withWriteLock([this, &function](DeltaTriples& deltaTriples) {
+    function(deltaTriples);
+    auto newSnapshot = deltaTriples.getSnapshot();
+    currentLocatedTriplesSnapshot_.withWriteLock(
+        [&newSnapshot](auto& currentSnapshot) {
+          currentSnapshot = std::move(newSnapshot);
+        });
+  });
+}
+
+// _____________________________________________________________________________
+SharedLocatedTriplesSnapshot DeltaTriplesManager::getCurrentSnapshot() const {
+  return *currentLocatedTriplesSnapshot_.rlock();
+}
diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h
index 05342a845b..afe13c7c07 100644
--- a/src/index/DeltaTriples.h
+++ b/src/index/DeltaTriples.h
@@ -1,8 +1,8 @@
 // Copyright 2023 - 2024, University of Freiburg
-//  Chair of Algorithms and Data Structures.
-//  Authors:
-//    2023 Hannah Bast <bast@cs.uni-freiburg.de>
-//    2024 Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+//          Julian Mundhahs <mundhahj@tf.uni-freiburg.de>
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
 #pragma once
 
@@ -12,6 +12,29 @@
 #include "index/IndexBuilderTypes.h"
 #include "index/LocatedTriples.h"
 #include "index/Permutation.h"
+#include "util/Synchronized.h"
+
+// Typedef for one `LocatedTriplesPerBlock` object for each of the six
+// permutations.
+using LocatedTriplesPerBlockAllPermutations =
+    std::array<LocatedTriplesPerBlock, Permutation::ALL.size()>;
+
+// The locations of a set of delta triples (triples that were inserted or
+// deleted since the index was built) in each of the six permutations, and a
+// local vocab. This is all the information that is required to perform a query
+// that correctly respects these delta triples, hence the name.
+struct LocatedTriplesSnapshot {
+  LocatedTriplesPerBlockAllPermutations locatedTriplesPerBlock_;
+  LocalVocab localVocab_;
+  // Get `TripleWithPosition` objects for given permutation.
+  const LocatedTriplesPerBlock& getLocatedTriplesForPermutation(
+      Permutation::Enum permutation) const;
+};
+
+// A shared pointer to a constant `LocatedTriplesSnapshot`, but as an explicit
+// class, such that it can be forward-declared.
+class SharedLocatedTriplesSnapshot
+    : public std::shared_ptr<const LocatedTriplesSnapshot> {};
 
 // A class for maintaining triples that are inserted or deleted after index
 // building, we call these delta triples. How it works in principle:
@@ -33,9 +56,16 @@ class DeltaTriples {
   FRIEND_TEST(DeltaTriplesTest, clear);
   FRIEND_TEST(DeltaTriplesTest, addTriplesToLocalVocab);
 
+ public:
+  using Triples = std::vector<IdTriple<0>>;
+  using CancellationHandle = ad_utility::SharedCancellationHandle;
+
  private:
   // The index to which these triples are added.
-  const Index& index_;
+  const IndexImpl& index_;
+
+  // The located triples for all the 6 permutations.
+  LocatedTriplesPerBlockAllPermutations locatedTriples_;
 
   // The local vocabulary of the delta triples (they may have components,
   // which are not contained in the vocabulary of the original index).
@@ -52,10 +82,6 @@ class DeltaTriples {
   static_assert(static_cast<int>(Permutation::Enum::OSP) == 5);
   static_assert(Permutation::ALL.size() == 6);
 
-  // The positions of the delta triples in each of the six permutations.
-  std::array<LocatedTriplesPerBlock, Permutation::ALL.size()>
-      locatedTriplesPerBlock_;
-
   // Each delta triple needs to know where it is stored in each of the six
   // `LocatedTriplesPerBlock` above.
   struct LocatedTripleHandles {
@@ -66,8 +92,6 @@ class DeltaTriples {
   };
   using TriplesToHandlesMap =
       ad_utility::HashMap<IdTriple<0>, LocatedTripleHandles>;
-  using Triples = std::vector<IdTriple<0>>;
-  using CancellationHandle = ad_utility::SharedCancellationHandle;
 
   // The sets of triples added to and subtracted from the original index. Any
   // triple can be at most in one of the sets. The information whether a triple
@@ -78,15 +102,26 @@ class DeltaTriples {
 
  public:
   // Construct for given index.
-  explicit DeltaTriples(const Index& index) : index_(index) {}
+  explicit DeltaTriples(const Index& index);
+  explicit DeltaTriples(const IndexImpl& index) : index_{index} {};
+
+  DeltaTriples(const DeltaTriples&) = delete;
+  DeltaTriples& operator=(const DeltaTriples&) = delete;
 
   // Get the common `LocalVocab` of the delta triples.
  private:
   LocalVocab& localVocab() { return localVocab_; }
+  auto& locatedTriples() { return locatedTriples_; }
+  const auto& locatedTriples() const { return locatedTriples_; }
 
  public:
   const LocalVocab& localVocab() const { return localVocab_; }
 
+  const LocatedTriplesPerBlock& getLocatedTriplesForPermutation(
+      Permutation::Enum permutation) const {
+    return locatedTriples_.at(static_cast<size_t>(permutation));
+  }
+
   // Clear `triplesAdded_` and `triplesSubtracted_` and all associated data
   // structures.
   void clear();
@@ -101,9 +136,10 @@ class DeltaTriples {
   // Delete triples.
   void deleteTriples(CancellationHandle cancellationHandle, Triples triples);
 
-  // Get `TripleWithPosition` objects for given permutation.
-  const LocatedTriplesPerBlock& getLocatedTriplesPerBlock(
-      Permutation::Enum permutation) const;
+  // Return a deep copy of the `LocatedTriples` and the corresponding
+  // `LocalVocab` which form a snapshot of the current status of this
+  // `DeltaTriples` object.
+  SharedLocatedTriplesSnapshot getSnapshot() const;
 
  private:
   // Find the position of the given triple in the given permutation and add it
@@ -144,7 +180,27 @@ class DeltaTriples {
   void eraseTripleInAllPermutations(LocatedTripleHandles& handles);
 };
 
-// DELTA TRIPLES AND THE CACHE
-//
-// Changes to the DeltaTriples invalidate all cache results that have an index
-// scan in their subtree, which is almost all entries in practice.
+// This class synchronizes the access to a `DeltaTriples` object, thus avoiding
+// race conditions between concurrent updates and queries.
+class DeltaTriplesManager {
+  ad_utility::Synchronized<DeltaTriples> deltaTriples_;
+  ad_utility::Synchronized<SharedLocatedTriplesSnapshot, std::shared_mutex>
+      currentLocatedTriplesSnapshot_;
+
+ public:
+  using CancellationHandle = DeltaTriples::CancellationHandle;
+  using Triples = DeltaTriples::Triples;
+
+  explicit DeltaTriplesManager(const IndexImpl& index);
+  FRIEND_TEST(DeltaTriplesTest, DeltaTriplesManager);
+
+  // Modify the underlying `DeltaTriples` by applying `function` and then update
+  // the current snapshot. Concurrent calls to `modify` will be serialized, and
+  // each call to `getCurrentSnapshot` will either return the snapshot before or
+  // after a modification, but never one of an ongoing modification.
+  void modify(const std::function<void(DeltaTriples&)>& function);
+
+  // Return a shared pointer to a deep copy of the current snapshot. This can
+  // be safely used to execute a query without interfering with future updates.
+  SharedLocatedTriplesSnapshot getCurrentSnapshot() const;
+};
diff --git a/src/index/Index.cpp b/src/index/Index.cpp
index 86af95a798..47fcad9c82 100644
--- a/src/index/Index.cpp
+++ b/src/index/Index.cpp
@@ -57,15 +57,17 @@ ad_utility::BlankNodeManager* Index::getBlankNodeManager() const {
 }
 
 // ____________________________________________________________________________
-size_t Index::getCardinality(const TripleComponent& comp, Permutation::Enum p,
-                             const DeltaTriples& deltaTriples) const {
-  return pimpl_->getCardinality(comp, p, deltaTriples);
+size_t Index::getCardinality(
+    const TripleComponent& comp, Permutation::Enum p,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
+  return pimpl_->getCardinality(comp, p, locatedTriplesSnapshot);
 }
 
 // ____________________________________________________________________________
-size_t Index::getCardinality(Id id, Permutation::Enum p,
-                             const DeltaTriples& deltaTriples) const {
-  return pimpl_->getCardinality(id, p, deltaTriples);
+size_t Index::getCardinality(
+    Id id, Permutation::Enum p,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
+  return pimpl_->getCardinality(id, p, locatedTriplesSnapshot);
 }
 
 // ____________________________________________________________________________
@@ -254,10 +256,10 @@ vector<float> Index::getMultiplicities(Permutation::Enum p) const {
 }
 
 // ____________________________________________________________________________
-vector<float> Index::getMultiplicities(const TripleComponent& key,
-                                       Permutation::Enum p,
-                                       const DeltaTriples& deltaTriples) const {
-  return pimpl_->getMultiplicities(key, p, deltaTriples);
+vector<float> Index::getMultiplicities(
+    const TripleComponent& key, Permutation::Enum p,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
+  return pimpl_->getMultiplicities(key, p, locatedTriplesSnapshot);
 }
 
 // ____________________________________________________________________________
@@ -265,10 +267,10 @@ IdTable Index::scan(
     const ScanSpecificationAsTripleComponent& scanSpecification,
     Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
-    const DeltaTriples& deltaTriples,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot,
     const LimitOffsetClause& limitOffset) const {
   return pimpl_->scan(scanSpecification, p, additionalColumns,
-                      cancellationHandle, deltaTriples, limitOffset);
+                      cancellationHandle, locatedTriplesSnapshot, limitOffset);
 }
 
 // ____________________________________________________________________________
@@ -276,21 +278,32 @@ IdTable Index::scan(
     const ScanSpecification& scanSpecification, Permutation::Enum p,
     Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
-    const DeltaTriples& deltaTriples,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot,
     const LimitOffsetClause& limitOffset) const {
   return pimpl_->scan(scanSpecification, p, additionalColumns,
-                      cancellationHandle, deltaTriples, limitOffset);
+                      cancellationHandle, locatedTriplesSnapshot, limitOffset);
 }
 
 // ____________________________________________________________________________
-size_t Index::getResultSizeOfScan(const ScanSpecification& scanSpecification,
-                                  const Permutation::Enum& permutation,
-                                  const DeltaTriples& deltaTriples) const {
+size_t Index::getResultSizeOfScan(
+    const ScanSpecification& scanSpecification,
+    const Permutation::Enum& permutation,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   return pimpl_->getResultSizeOfScan(scanSpecification, permutation,
-                                     deltaTriples);
+                                     locatedTriplesSnapshot);
 }
 
 // ____________________________________________________________________________
 void Index::createFromFiles(const std::vector<InputFileSpecification>& files) {
   return pimpl_->createFromFiles(files);
 }
+
+// ____________________________________________________________________________
+const DeltaTriplesManager& Index::deltaTriplesManager() const {
+  return pimpl_->deltaTriplesManager();
+}
+
+// ____________________________________________________________________________
+DeltaTriplesManager& Index::deltaTriplesManager() {
+  return pimpl_->deltaTriplesManager();
+}
diff --git a/src/index/Index.h b/src/index/Index.h
index d42c5ced42..ec408f15df 100644
--- a/src/index/Index.h
+++ b/src/index/Index.h
@@ -1,4 +1,3 @@
-// Copyright 2015, University of Freiburg,
 // Chair of Algorithms and Data Structures.
 // Author:
 //   2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de)
@@ -23,7 +22,8 @@
 class IdTable;
 class TextBlockMetaData;
 class IndexImpl;
-class DeltaTriples;
+struct LocatedTriplesSnapshot;
+class DeltaTriplesManager;
 
 class Index {
  private:
@@ -116,14 +116,19 @@ class Index {
   // Get a (non-owning) pointer to the BlankNodeManager of this Index.
   ad_utility::BlankNodeManager* getBlankNodeManager() const;
 
+  // Get a (non-owning) pointer to the BlankNodeManager of this Index.
+  DeltaTriplesManager& deltaTriplesManager();
+  const DeltaTriplesManager& deltaTriplesManager() const;
+
   // --------------------------------------------------------------------------
   // RDF RETRIEVAL
   // --------------------------------------------------------------------------
-  [[nodiscard]] size_t getCardinality(const TripleComponent& comp,
-                                      Permutation::Enum permutation,
-                                      const DeltaTriples& deltaTriples) const;
-  [[nodiscard]] size_t getCardinality(Id id, Permutation::Enum permutation,
-                                      const DeltaTriples& deltaTriples) const;
+  [[nodiscard]] size_t getCardinality(
+      const TripleComponent& comp, Permutation::Enum permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
+  [[nodiscard]] size_t getCardinality(
+      Id id, Permutation::Enum permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // TODO<joka921> Once we have an overview over the folding this logic should
   // probably not be in the index class.
@@ -217,9 +222,9 @@ class Index {
   bool hasAllPermutations() const;
 
   // _____________________________________________________________________________
-  vector<float> getMultiplicities(const TripleComponent& key,
-                                  Permutation::Enum permutation,
-                                  const DeltaTriples& deltaTriples) const;
+  vector<float> getMultiplicities(
+      const TripleComponent& key, Permutation::Enum permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // ___________________________________________________________________
   vector<float> getMultiplicities(Permutation::Enum p) const;
@@ -243,21 +248,22 @@ class Index {
                Permutation::Enum p,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
-               const DeltaTriples& deltaTriples,
+               const LocatedTriplesSnapshot& locatedTriplesSnapshot,
                const LimitOffsetClause& limitOffset = {}) const;
 
   // Similar to the overload of `scan` above, but the keys are specified as IDs.
   IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
-               const DeltaTriples& deltaTriples,
+               const LocatedTriplesSnapshot& locatedTriplesSnapshot,
                const LimitOffsetClause& limitOffset = {}) const;
 
   // Similar to the previous overload of `scan`, but only get the exact size of
   // the scan result.
-  size_t getResultSizeOfScan(const ScanSpecification& scanSpecification,
-                             const Permutation::Enum& permutation,
-                             const DeltaTriples& deltaTriples) const;
+  size_t getResultSizeOfScan(
+      const ScanSpecification& scanSpecification,
+      const Permutation::Enum& permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // Get access to the implementation. This should be used rarely as it
   // requires including the rather expensive `IndexImpl.h` header
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 7f5e479f59..1e69d9676b 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -44,6 +44,7 @@ static constexpr size_t NUM_EXTERNAL_SORTERS_AT_SAME_TIME = 2u;
 IndexImpl::IndexImpl(ad_utility::AllocatorWithLimit<Id> allocator)
     : allocator_{std::move(allocator)} {
   globalSingletonIndex_ = this;
+  deltaTriples_.emplace(*this);
 };
 
 // _____________________________________________________________________________
@@ -1445,10 +1446,11 @@ Index::NumNormalAndInternal IndexImpl::numDistinctCol0(
 }
 
 // ___________________________________________________________________________
-size_t IndexImpl::getCardinality(Id id, Permutation::Enum permutation,
-                                 const DeltaTriples& deltaTriples) const {
+size_t IndexImpl::getCardinality(
+    Id id, Permutation::Enum permutation,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   if (const auto& meta =
-          getPermutation(permutation).getMetadata(id, deltaTriples);
+          getPermutation(permutation).getMetadata(id, locatedTriplesSnapshot);
       meta.has_value()) {
     return meta.value().numRows_;
   }
@@ -1456,9 +1458,9 @@ size_t IndexImpl::getCardinality(Id id, Permutation::Enum permutation,
 }
 
 // ___________________________________________________________________________
-size_t IndexImpl::getCardinality(const TripleComponent& comp,
-                                 Permutation::Enum permutation,
-                                 const DeltaTriples& deltaTriples) const {
+size_t IndexImpl::getCardinality(
+    const TripleComponent& comp, Permutation::Enum permutation,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   // TODO<joka921> This special case is only relevant for the `PSO` and `POS`
   // permutations, but this internal predicate should never appear in subjects
   // or objects anyway.
@@ -1468,7 +1470,7 @@ size_t IndexImpl::getCardinality(const TripleComponent& comp,
     return TEXT_PREDICATE_CARDINALITY_ESTIMATE;
   }
   if (std::optional<Id> relId = comp.toValueId(getVocab()); relId.has_value()) {
-    return getCardinality(relId.value(), permutation, deltaTriples);
+    return getCardinality(relId.value(), permutation, locatedTriplesSnapshot);
   }
   return 0;
 }
@@ -1491,10 +1493,10 @@ Index::Vocab::PrefixRanges IndexImpl::prefixRanges(
 // _____________________________________________________________________________
 vector<float> IndexImpl::getMultiplicities(
     const TripleComponent& key, Permutation::Enum permutation,
-    const DeltaTriples& deltaTriples) const {
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   if (auto keyId = key.toValueId(getVocab()); keyId.has_value()) {
-    auto meta =
-        getPermutation(permutation).getMetadata(keyId.value(), deltaTriples);
+    auto meta = getPermutation(permutation)
+                    .getMetadata(keyId.value(), locatedTriplesSnapshot);
     if (meta.has_value()) {
       return {meta.value().getCol1Multiplicity(),
               meta.value().getCol2Multiplicity()};
@@ -1520,30 +1522,31 @@ IdTable IndexImpl::scan(
     const Permutation::Enum& permutation,
     Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
-    const DeltaTriples& deltaTriples,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot,
     const LimitOffsetClause& limitOffset) const {
   auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this);
   return scan(scanSpecification, permutation, additionalColumns,
-              cancellationHandle, deltaTriples, limitOffset);
+              cancellationHandle, locatedTriplesSnapshot, limitOffset);
 }
 // _____________________________________________________________________________
 IdTable IndexImpl::scan(
     const ScanSpecification& scanSpecification, Permutation::Enum p,
     Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
-    const DeltaTriples& deltaTriples,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot,
     const LimitOffsetClause& limitOffset) const {
   return getPermutation(p).scan(scanSpecification, additionalColumns,
-                                cancellationHandle, deltaTriples, limitOffset);
+                                cancellationHandle, locatedTriplesSnapshot,
+                                limitOffset);
 }
 
 // _____________________________________________________________________________
 size_t IndexImpl::getResultSizeOfScan(
     const ScanSpecification& scanSpecification,
     const Permutation::Enum& permutation,
-    const DeltaTriples& deltaTriples) const {
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   return getPermutation(permutation)
-      .getResultSizeOfScan(scanSpecification, deltaTriples);
+      .getResultSizeOfScan(scanSpecification, locatedTriplesSnapshot);
 }
 
 // _____________________________________________________________________________
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index d62f4a7e13..0d5b396ccc 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -18,6 +18,7 @@
 #include "global/SpecialIds.h"
 #include "index/CompressedRelation.h"
 #include "index/ConstantsIndexBuilding.h"
+#include "index/DeltaTriples.h"
 #include "index/DocsDB.h"
 #include "index/Index.h"
 #include "index/IndexBuilderTypes.h"
@@ -188,6 +189,8 @@ class IndexImpl {
   // BlankNodeManager, initialized during `readConfiguration`
   std::unique_ptr<ad_utility::BlankNodeManager> blankNodeManager_{nullptr};
 
+  std::optional<DeltaTriplesManager> deltaTriples_;
+
  public:
   explicit IndexImpl(ad_utility::AllocatorWithLimit<Id> allocator);
 
@@ -261,6 +264,11 @@ class IndexImpl {
 
   ad_utility::BlankNodeManager* getBlankNodeManager() const;
 
+  DeltaTriplesManager& deltaTriplesManager() { return deltaTriples_.value(); }
+  const DeltaTriplesManager& deltaTriplesManager() const {
+    return deltaTriples_.value();
+  }
+
   // --------------------------------------------------------------------------
   //  -- RETRIEVAL ---
   // --------------------------------------------------------------------------
@@ -283,12 +291,12 @@ class IndexImpl {
 
   // ___________________________________________________________________________
   size_t getCardinality(Id id, Permutation::Enum permutation,
-                        const DeltaTriples&) const;
+                        const LocatedTriplesSnapshot&) const;
 
   // ___________________________________________________________________________
-  size_t getCardinality(const TripleComponent& comp,
-                        Permutation::Enum permutation,
-                        const DeltaTriples& deltaTriples) const;
+  size_t getCardinality(
+      const TripleComponent& comp, Permutation::Enum permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // ___________________________________________________________________________
   std::string indexToString(VocabIndex id) const;
@@ -420,9 +428,9 @@ class IndexImpl {
   bool hasAllPermutations() const { return SPO().isLoaded(); }
 
   // _____________________________________________________________________________
-  vector<float> getMultiplicities(const TripleComponent& key,
-                                  Permutation::Enum permutation,
-                                  const DeltaTriples&) const;
+  vector<float> getMultiplicities(
+      const TripleComponent& key, Permutation::Enum permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // ___________________________________________________________________
   vector<float> getMultiplicities(Permutation::Enum permutation) const;
@@ -432,20 +440,21 @@ class IndexImpl {
                const Permutation::Enum& permutation,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
-               const DeltaTriples& deltaTriples,
+               const LocatedTriplesSnapshot& locatedTriplesSnapshot,
                const LimitOffsetClause& limitOffset = {}) const;
 
   // _____________________________________________________________________________
   IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
-               const DeltaTriples& deltaTriples,
+               const LocatedTriplesSnapshot& locatedTriplesSnapshot,
                const LimitOffsetClause& limitOffset = {}) const;
 
   // _____________________________________________________________________________
-  size_t getResultSizeOfScan(const ScanSpecification& scanSpecification,
-                             const Permutation::Enum& permutation,
-                             const DeltaTriples& deltaTriples) const;
+  size_t getResultSizeOfScan(
+      const ScanSpecification& scanSpecification,
+      const Permutation::Enum& permutation,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
  private:
   // Private member functions
diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp
index 9bcad6838b..e898327a9a 100644
--- a/src/index/LocatedTriples.cpp
+++ b/src/index/LocatedTriples.cpp
@@ -283,3 +283,19 @@ std::ostream& operator<<(std::ostream& os, const std::vector<IdTriple<0>>& v) {
   std::ranges::copy(v, std::ostream_iterator<IdTriple<0>>(os, ", "));
   return os;
 }
+
+// ____________________________________________________________________________
+bool LocatedTriplesPerBlock::containsTriple(const IdTriple<0>& triple,
+                                            bool shouldExist) const {
+  auto blockContains = [&triple, shouldExist](const LocatedTriples& lt,
+                                              size_t blockIndex) {
+    LocatedTriple locatedTriple{blockIndex, triple, shouldExist};
+    locatedTriple.blockIndex_ = blockIndex;
+    return ad_utility::contains(lt, locatedTriple);
+  };
+
+  return std::ranges::any_of(map_, [&blockContains](auto& indexAndBlock) {
+    const auto& [index, block] = indexAndBlock;
+    return blockContains(block, index);
+  });
+}
diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h
index c9d82d6745..c1b612a775 100644
--- a/src/index/LocatedTriples.h
+++ b/src/index/LocatedTriples.h
@@ -127,14 +127,14 @@ class LocatedTriplesPerBlock {
   IdTable mergeTriples(size_t blockIndex, const IdTable& block,
                        size_t numIndexColumns, bool includeGraphColumn) const;
 
-  // Add `locatedTriples` to the `LocatedTriplesPerBlock`.
+  // Add `getLocatedTriplesForPermutation` to the `LocatedTriplesPerBlock`.
   // Return handles to where they were added (`LocatedTriples` is a sorted set,
   // see above). We need the handles so that we can easily remove the
-  // `locatedTriples` from the set again in case we need to.
+  // `getLocatedTriplesForPermutation` from the set again in case we need to.
   //
   // PRECONDITIONS:
   //
-  // 1. The `locatedTriples` must not already exist in
+  // 1. The `getLocatedTriplesForPermutation` must not already exist in
   // `LocatedTriplesPerBlock`.
   std::vector<LocatedTriples::iterator> add(
       std::span<const LocatedTriple> locatedTriples);
@@ -167,6 +167,10 @@ class LocatedTriplesPerBlock {
     augmentedMetadata_ = originalMetadata_;
   }
 
+  // Only used for testing. Return `true` iff a `LocatedTriple` with the given
+  // value for `shouldExist` is contained in any block.
+  bool containsTriple(const IdTriple<0>& triple, bool shouldExist) const;
+
   // This operator is only for debugging and testing. It returns a
   // human-readable representation.
   friend std::ostream& operator<<(std::ostream& os,
diff --git a/src/index/Permutation.cpp b/src/index/Permutation.cpp
index 16f5113d68..cbe4b5dd1f 100644
--- a/src/index/Permutation.cpp
+++ b/src/index/Permutation.cpp
@@ -55,7 +55,7 @@ void Permutation::loadFromDisk(const std::string& onDiskBase,
 IdTable Permutation::scan(const ScanSpecification& scanSpec,
                           ColumnIndicesRef additionalColumns,
                           const CancellationHandle& cancellationHandle,
-                          const DeltaTriples& deltaTriples,
+                          const LocatedTriplesSnapshot& locatedTriplesSnapshot,
                           const LimitOffsetClause& limitOffset) const {
   if (!isLoaded_) {
     throw std::runtime_error("This query requires the permutation " +
@@ -64,35 +64,38 @@ IdTable Permutation::scan(const ScanSpecification& scanSpec,
 
   const auto& p = getActualPermutation(scanSpec);
 
-  return p.reader().scan(scanSpec, p.meta_.blockData(), additionalColumns,
-                         cancellationHandle, locatedTriples(deltaTriples),
-                         limitOffset);
+  return p.reader().scan(
+      scanSpec, p.meta_.blockData(), additionalColumns, cancellationHandle,
+      getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
 }
 
 // _____________________________________________________________________
 size_t Permutation::getResultSizeOfScan(
-    const ScanSpecification& scanSpec, const DeltaTriples& deltaTriples) const {
+    const ScanSpecification& scanSpec,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   const auto& p = getActualPermutation(scanSpec);
-  return p.reader().getResultSizeOfScan(scanSpec, p.meta_.blockData(),
-                                        locatedTriples(deltaTriples));
+  return p.reader().getResultSizeOfScan(
+      scanSpec, p.meta_.blockData(),
+      getLocatedTriplesForPermutation(locatedTriplesSnapshot));
 }
 
 // ____________________________________________________________________________
 IdTable Permutation::getDistinctCol1IdsAndCounts(
     Id col0Id, const CancellationHandle& cancellationHandle,
-    const DeltaTriples& deltaTriples) const {
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   const auto& p = getActualPermutation(col0Id);
-  return p.reader().getDistinctCol1IdsAndCounts(col0Id, p.meta_.blockData(),
-                                                cancellationHandle,
-                                                locatedTriples(deltaTriples));
+  return p.reader().getDistinctCol1IdsAndCounts(
+      col0Id, p.meta_.blockData(), cancellationHandle,
+      getLocatedTriplesForPermutation(locatedTriplesSnapshot));
 }
 
 // ____________________________________________________________________________
 IdTable Permutation::getDistinctCol0IdsAndCounts(
     const CancellationHandle& cancellationHandle,
-    const DeltaTriples& deltaTriples) const {
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   return reader().getDistinctCol0IdsAndCounts(
-      meta_.blockData(), cancellationHandle, locatedTriples(deltaTriples));
+      meta_.blockData(), cancellationHandle,
+      getLocatedTriplesForPermutation(locatedTriplesSnapshot));
 }
 
 // _____________________________________________________________________
@@ -137,25 +140,27 @@ std::string_view Permutation::toString(Permutation::Enum permutation) {
 
 // _____________________________________________________________________
 std::optional<CompressedRelationMetadata> Permutation::getMetadata(
-    Id col0Id, const DeltaTriples& deltaTriples) const {
+    Id col0Id, const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   const auto& p = getActualPermutation(col0Id);
   if (p.meta_.col0IdExists(col0Id)) {
     return p.meta_.getMetaData(col0Id);
   }
-  return p.reader().getMetadataForSmallRelation(p.meta_.blockData(), col0Id,
-                                                locatedTriples(deltaTriples));
+  return p.reader().getMetadataForSmallRelation(
+      p.meta_.blockData(), col0Id,
+      getLocatedTriplesForPermutation(locatedTriplesSnapshot));
 }
 
 // _____________________________________________________________________
 std::optional<Permutation::MetadataAndBlocks> Permutation::getMetadataAndBlocks(
-    const ScanSpecification& scanSpec, const DeltaTriples& deltaTriples) const {
+    const ScanSpecification& scanSpec,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
   const auto& p = getActualPermutation(scanSpec);
   CompressedRelationReader::ScanSpecAndBlocks mb{
       scanSpec, CompressedRelationReader::getRelevantBlocks(
                     scanSpec, p.meta_.blockData())};
 
-  auto firstAndLastTriple =
-      p.reader().getFirstAndLastTriple(mb, locatedTriples(deltaTriples));
+  auto firstAndLastTriple = p.reader().getFirstAndLastTriple(
+      mb, getLocatedTriplesForPermutation(locatedTriplesSnapshot));
   if (!firstAndLastTriple.has_value()) {
     return std::nullopt;
   }
@@ -169,7 +174,7 @@ Permutation::IdTableGenerator Permutation::lazyScan(
     std::optional<std::vector<CompressedBlockMetadata>> blocks,
     ColumnIndicesRef additionalColumns,
     ad_utility::SharedCancellationHandle cancellationHandle,
-    const DeltaTriples& deltaTriples,
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot,
     const LimitOffsetClause& limitOffset) const {
   const auto& p = getActualPermutation(scanSpec);
   if (!blocks.has_value()) {
@@ -178,9 +183,10 @@ Permutation::IdTableGenerator Permutation::lazyScan(
     blocks = std::vector(blockSpan.begin(), blockSpan.end());
   }
   ColumnIndices columns{additionalColumns.begin(), additionalColumns.end()};
-  return p.reader().lazyScan(scanSpec, std::move(blocks.value()),
-                             std::move(columns), std::move(cancellationHandle),
-                             locatedTriples(deltaTriples), limitOffset);
+  return p.reader().lazyScan(
+      scanSpec, std::move(blocks.value()), std::move(columns),
+      std::move(cancellationHandle),
+      getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
 }
 
 // ______________________________________________________________________
@@ -210,7 +216,7 @@ const Permutation& Permutation::getActualPermutation(Id id) const {
 }
 
 // ______________________________________________________________________
-const LocatedTriplesPerBlock& Permutation::locatedTriples(
-    const DeltaTriples& deltaTriples) const {
-  return deltaTriples.getLocatedTriplesPerBlock(permutation_);
+const LocatedTriplesPerBlock& Permutation::getLocatedTriplesForPermutation(
+    const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
+  return locatedTriplesSnapshot.getLocatedTriplesForPermutation(permutation_);
 }
diff --git a/src/index/Permutation.h b/src/index/Permutation.h
index 93cad7e775..118153708b 100644
--- a/src/index/Permutation.h
+++ b/src/index/Permutation.h
@@ -1,6 +1,7 @@
-// Copyright 2018, University of Freiburg,
-// Chair of Algorithms and Data Structures.
-// Author: Johannes Kalmbach<joka921> (johannes.kalmbach@gmail.com)
+// Copyright 2018 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
 #pragma once
 
 #include <array>
@@ -18,7 +19,8 @@
 class IdTable;
 // Forward declaration of `LocatedTriplesPerBlock`
 class LocatedTriplesPerBlock;
-class DeltaTriples;
+class SharedLocatedTriplesSnapshot;
+struct LocatedTriplesSnapshot;
 
 // Helper class to store static properties of the different permutations to
 // avoid code duplication. The first template parameter is a search functor for
@@ -66,7 +68,7 @@ class Permutation {
   IdTable scan(const ScanSpecification& scanSpec,
                ColumnIndicesRef additionalColumns,
                const CancellationHandle& cancellationHandle,
-               const DeltaTriples& deltaTriples,
+               const LocatedTriplesSnapshot& locatedTriplesSnapshot,
                const LimitOffsetClause& limitOffset = {}) const;
 
   // For a given relation, determine the `col1Id`s and their counts. This is
@@ -74,11 +76,11 @@ class Permutation {
   // in `meta_`.
   IdTable getDistinctCol1IdsAndCounts(
       Id col0Id, const CancellationHandle& cancellationHandle,
-      const DeltaTriples& deltaTriples) const;
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   IdTable getDistinctCol0IdsAndCounts(
       const CancellationHandle& cancellationHandle,
-      const DeltaTriples& deltaTriples) const;
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // Typedef to propagate the `MetadataAndblocks` and `IdTableGenerator` type.
   using MetadataAndBlocks =
@@ -102,11 +104,11 @@ class Permutation {
       const ScanSpecification& scanSpec,
       std::optional<std::vector<CompressedBlockMetadata>> blocks,
       ColumnIndicesRef additionalColumns, CancellationHandle cancellationHandle,
-      const DeltaTriples& deltaTriples,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot,
       const LimitOffsetClause& limitOffset = {}) const;
 
   std::optional<CompressedRelationMetadata> getMetadata(
-      Id col0Id, const DeltaTriples& deltaTriples) const;
+      Id col0Id, const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // Return the metadata for the scan specified by the `scanSpecification`
   // along with the metadata for all the blocks that are relevant for this scan.
@@ -114,12 +116,13 @@ class Permutation {
   // empty) return `nullopt`.
   std::optional<MetadataAndBlocks> getMetadataAndBlocks(
       const ScanSpecification& scanSpec,
-      const DeltaTriples& deltaTriples) const;
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   /// Similar to the previous `scan` function, but only get the size of the
   /// result
-  size_t getResultSizeOfScan(const ScanSpecification& scanSpec,
-                             const DeltaTriples& deltaTriples) const;
+  size_t getResultSizeOfScan(
+      const ScanSpecification& scanSpec,
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // _______________________________________________________
   void setKbName(const string& name) { meta_.setName(name); }
@@ -146,19 +149,21 @@ class Permutation {
   const Permutation& getActualPermutation(const ScanSpecification& spec) const;
   const Permutation& getActualPermutation(Id id) const;
 
-  const LocatedTriplesPerBlock& locatedTriples(const DeltaTriples&) const;
+  // From the given snapshot, get the located triples for this permutation.
+  const LocatedTriplesPerBlock& getLocatedTriplesForPermutation(
+      const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   const CompressedRelationReader& reader() const { return reader_.value(); }
 
  private:
-  // for Log output, e.g. "POS"
+  // Readable name for this permutation, e.g., `POS`.
   std::string readableName_;
-  // e.g. ".pos"
+  // File name suffix for this permutation, e.g., `.pos`.
   std::string fileSuffix_;
-  // order of the 3 keys S(0), P(1), and O(2) for which this permutation is
-  // sorted, for example {1, 0, 2} for PSO.
+  // The order of the three components (S=0, P=1, O=2) in this permutation,
+  // e.g., `{1, 0, 2}` for `PSO`.
   array<size_t, 3> keyOrder_;
-
+  // The metadata for this permutation.
   MetaData meta_;
 
   // This member is `optional` because we initialize it in a deferred way in the
diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp
index e9e858d727..cb481b43b8 100644
--- a/test/DeltaTriplesTest.cpp
+++ b/test/DeltaTriplesTest.cpp
@@ -319,3 +319,105 @@ TEST_F(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes) {
   auto s4 = triples[0].ids_[0];
   EXPECT_EQ(s4.getBits(), blank0.getBits());
 }
+
+// _____________________________________________________________________________
+TEST_F(DeltaTriplesTest, DeltaTriplesManager) {
+  // Preparation.
+  DeltaTriplesManager deltaTriplesManager(testQec->getIndex().getImpl());
+  auto& vocab = testQec->getIndex().getVocab();
+  auto cancellationHandle =
+      std::make_shared<ad_utility::CancellationHandle<>>();
+  std::vector<ad_utility::JThread> threads;
+  static constexpr size_t numThreads = 18;
+  static constexpr size_t numIterations = 21;
+
+  // Insert and delete a well-defined set of triples, some independent and some
+  // dependent on the thread index. Check that the snapshot before in the
+  // middle of these updates is as expected.
+  auto insertAndDelete = [&](size_t threadIdx) {
+    LocalVocab localVocab;
+    SharedLocatedTriplesSnapshot beforeUpdate =
+        deltaTriplesManager.getCurrentSnapshot();
+    for (size_t i = 0; i < numIterations; ++i) {
+      // The first triple in both vectors is the same for all threads, the
+      // others are exclusive to this thread via the `threadIdx`.
+      auto triplesToInsert = makeIdTriples(
+          vocab, localVocab,
+          {"<A> <B> <C>", absl::StrCat("<A> <B> <D", threadIdx, ">"),
+           absl::StrCat("<A> <B> <E", threadIdx, ">")});
+      auto triplesToDelete = makeIdTriples(
+          vocab, localVocab,
+          {"<A> <C> <E>", absl::StrCat("<A> <B> <E", threadIdx, ">"),
+           absl::StrCat("<A> <B> <F", threadIdx, ">")});
+      // Insert the `triplesToInsert`.
+      deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) {
+        deltaTriples.insertTriples(cancellationHandle, triplesToInsert);
+      });
+      // We should have successfully completed an update, so the snapshot
+      // pointer should have changed.
+      EXPECT_NE(beforeUpdate, deltaTriplesManager.getCurrentSnapshot());
+      // Delete the `triplesToDelete`.
+      deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) {
+        deltaTriples.deleteTriples(cancellationHandle, triplesToDelete);
+      });
+
+      // Make some checks in the middle of these updates (while the other
+      // threads are likely to be in the middle of their updates as well).
+      if (i == numIterations / 2) {
+        {
+          // None of the thread-exclusive triples should be contained in the
+          // original snapshot and this should not change over time. The
+          // Boolean argument specifies whether the triple was inserted (`true`)
+          // or deleted (`false`).
+          const auto& locatedSPO =
+              beforeUpdate->getLocatedTriplesForPermutation(Permutation::SPO);
+          EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(1), true));
+          EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(1), false));
+          EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(2), true));
+          EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(2), false));
+          EXPECT_FALSE(locatedSPO.containsTriple(triplesToDelete.at(2), true));
+          EXPECT_FALSE(locatedSPO.containsTriple(triplesToDelete.at(2), false));
+        }
+        {
+          // Check for several of the thread-exclusive triples that they are
+          // properly contained in the current snapshot.
+          //
+          auto p = deltaTriplesManager.getCurrentSnapshot();
+          const auto& locatedSPO =
+              p->getLocatedTriplesForPermutation(Permutation::SPO);
+          EXPECT_TRUE(locatedSPO.containsTriple(triplesToInsert.at(1), true));
+          // This triple is exclusive to the thread and is inserted and then
+          // immediately deleted again. The `DeltaTriples` thus only store it as
+          // deleted. It might be contained in the original input, hence we
+          // cannot simply drop it.
+          EXPECT_TRUE(locatedSPO.containsTriple(triplesToInsert.at(2), false));
+          EXPECT_TRUE(locatedSPO.containsTriple(triplesToDelete.at(2), false));
+        }
+      }
+    }
+  };
+
+  // Run the above for each of `numThreads` threads, where each thread knows
+  // its index (used to create the thread-exclusive triples).
+  for (size_t i = 0; i < numThreads; ++i) {
+    threads.emplace_back(insertAndDelete, i);
+  }
+  threads.clear();
+
+  // Check that without updates, the snapshot pointer does not change.
+  auto p1 = deltaTriplesManager.getCurrentSnapshot();
+  auto p2 = deltaTriplesManager.getCurrentSnapshot();
+  EXPECT_EQ(p1, p2);
+
+  // Each of the threads above inserts on thread-exclusive triple, deletes one
+  // thread-exclusive triple and inserts one thread-exclusive triple that is
+  // deleted right after (This triple is stored as deleted in the `DeltaTriples`
+  // because it might be contained in the original input). Additionally, there
+  // is one common triple inserted by// all the threads and one common triple
+  // that is deleted by all the threads.
+  //
+
+  auto deltaImpl = deltaTriplesManager.deltaTriples_.rlock();
+  EXPECT_THAT(*deltaImpl, NumTriples(numThreads + 1, 2 * numThreads + 1,
+                                     3 * numThreads + 2));
+}
diff --git a/test/DeltaTriplesTestHelpers.h b/test/DeltaTriplesTestHelpers.h
index 586a54196a..bf64175a17 100644
--- a/test/DeltaTriplesTestHelpers.h
+++ b/test/DeltaTriplesTestHelpers.h
@@ -25,7 +25,7 @@ inline auto InAllPermutations =
             absl::StrCat(".getLocatedTriplesPerBlock(",
                          Permutation::toString(perm), ")"),
             [perm](const DeltaTriples& deltaTriples) {
-              return deltaTriples.getLocatedTriplesPerBlock(perm);
+              return deltaTriples.getLocatedTriplesForPermutation(perm);
             },
             InnerMatcher);
       }));
diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp
index b707b11111..6ce7f6f732 100644
--- a/test/IndexTest.cpp
+++ b/test/IndexTest.cpp
@@ -41,7 +41,7 @@ auto makeTestScanWidthOne = [](const IndexImpl& index,
         IdTable result =
             index.scan({c0, c1, std::nullopt}, permutation, additionalColumns,
                        std::make_shared<ad_utility::CancellationHandle<>>(),
-                       qec.deltaTriples());
+                       qec.locatedTriplesSnapshot());
         ASSERT_EQ(result.numColumns(), 1 + additionalColumns.size());
         ASSERT_EQ(result, makeIdTableFromVector(expected));
       };
@@ -62,7 +62,7 @@ auto makeTestScanWidthTwo = [](const IndexImpl& index,
             index.scan({c0, std::nullopt, std::nullopt}, permutation,
                        Permutation::ColumnIndicesRef{},
                        std::make_shared<ad_utility::CancellationHandle<>>(),
-                       qec.deltaTriples());
+                       qec.locatedTriplesSnapshot());
         ASSERT_EQ(wol, makeIdTableFromVector(expected));
       };
 };
@@ -92,7 +92,7 @@ TEST(IndexTest, createFromTurtleTest) {
         return;
       }
       const auto& [index, qec] = getIndex();
-      const auto& deltaTriples = qec.deltaTriples();
+      const auto& locatedTriplesSnapshot = qec.locatedTriplesSnapshot();
 
       auto getId = makeGetId(getQec(kb)->getIndex());
       Id a = getId("<a>");
@@ -103,33 +103,49 @@ TEST(IndexTest, createFromTurtleTest) {
       Id c2 = getId("<c2>");
 
       // TODO<joka921> We could also test the multiplicities here.
-      ASSERT_TRUE(index.PSO().getMetadata(b, deltaTriples).has_value());
-      ASSERT_TRUE(index.PSO().getMetadata(b2, deltaTriples).has_value());
-      ASSERT_FALSE(index.PSO().getMetadata(a2, deltaTriples).has_value());
-      ASSERT_FALSE(index.PSO().getMetadata(c, deltaTriples).has_value());
+      ASSERT_TRUE(
+          index.PSO().getMetadata(b, locatedTriplesSnapshot).has_value());
+      ASSERT_TRUE(
+          index.PSO().getMetadata(b2, locatedTriplesSnapshot).has_value());
+      ASSERT_FALSE(
+          index.PSO().getMetadata(a2, locatedTriplesSnapshot).has_value());
+      ASSERT_FALSE(
+          index.PSO().getMetadata(c, locatedTriplesSnapshot).has_value());
       ASSERT_FALSE(
           index.PSO()
               .getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)),
-                           deltaTriples)
+                           locatedTriplesSnapshot)
               .has_value());
-      ASSERT_FALSE(
-          index.PSO().getMetadata(b, deltaTriples).value().isFunctional());
-      ASSERT_TRUE(
-          index.PSO().getMetadata(b2, deltaTriples).value().isFunctional());
+      ASSERT_FALSE(index.PSO()
+                       .getMetadata(b, locatedTriplesSnapshot)
+                       .value()
+                       .isFunctional());
+      ASSERT_TRUE(index.PSO()
+                      .getMetadata(b2, locatedTriplesSnapshot)
+                      .value()
+                      .isFunctional());
 
-      ASSERT_TRUE(index.POS().getMetadata(b, deltaTriples).has_value());
-      ASSERT_TRUE(index.POS().getMetadata(b2, deltaTriples).has_value());
-      ASSERT_FALSE(index.POS().getMetadata(a2, deltaTriples).has_value());
-      ASSERT_FALSE(index.POS().getMetadata(c, deltaTriples).has_value());
+      ASSERT_TRUE(
+          index.POS().getMetadata(b, locatedTriplesSnapshot).has_value());
+      ASSERT_TRUE(
+          index.POS().getMetadata(b2, locatedTriplesSnapshot).has_value());
+      ASSERT_FALSE(
+          index.POS().getMetadata(a2, locatedTriplesSnapshot).has_value());
+      ASSERT_FALSE(
+          index.POS().getMetadata(c, locatedTriplesSnapshot).has_value());
       ASSERT_FALSE(
           index.POS()
               .getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)),
-                           deltaTriples)
+                           locatedTriplesSnapshot)
               .has_value());
-      ASSERT_TRUE(
-          index.POS().getMetadata(b, deltaTriples).value().isFunctional());
-      ASSERT_TRUE(
-          index.POS().getMetadata(b2, deltaTriples).value().isFunctional());
+      ASSERT_TRUE(index.POS()
+                      .getMetadata(b, locatedTriplesSnapshot)
+                      .value()
+                      .isFunctional());
+      ASSERT_TRUE(index.POS()
+                      .getMetadata(b2, locatedTriplesSnapshot)
+                      .value()
+                      .isFunctional());
 
       // Relation b
       // Pair index
@@ -167,7 +183,7 @@ TEST(IndexTest, createFromTurtleTest) {
 
       const auto& qec = *getQec(kb);
       const IndexImpl& index = qec.getIndex().getImpl();
-      const auto& deltaTriples = qec.deltaTriples();
+      const auto& deltaTriples = qec.locatedTriplesSnapshot();
 
       auto getId = makeGetId(getQec(kb)->getIndex());
       Id zero = getId("<0>");
@@ -224,7 +240,7 @@ TEST(IndexTest, createFromOnDiskIndexTest) {
       "<a2> <b2> <c2> .";
   const auto& qec = *getQec(kb);
   const IndexImpl& index = qec.getIndex().getImpl();
-  const auto& deltaTriples = qec.deltaTriples();
+  const auto& deltaTriples = qec.locatedTriplesSnapshot();
 
   auto getId = makeGetId(getQec(kb)->getIndex());
   Id b = getId("<b>");
@@ -465,8 +481,8 @@ TEST(IndexTest, NumDistinctEntities) {
   EXPECT_FLOAT_EQ(multiplicities[1], 7.0 / 2.0);
   EXPECT_FLOAT_EQ(multiplicities[2], 7.0 / 7.0);
 
-  multiplicities =
-      index.getMultiplicities(iri("<x>"), Permutation::SPO, qec.deltaTriples());
+  multiplicities = index.getMultiplicities(iri("<x>"), Permutation::SPO,
+                                           qec.locatedTriplesSnapshot());
   EXPECT_FLOAT_EQ(multiplicities[0], 2.5);
   EXPECT_FLOAT_EQ(multiplicities[1], 1);
 }
diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp
index ca8d1f09ee..79eb77b0d5 100644
--- a/test/util/IndexTestHelpers.cpp
+++ b/test/util/IndexTestHelpers.cpp
@@ -55,17 +55,19 @@ namespace {
 // folded into the permutations as additional columns.
 void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
     const Index& index) {
-  DeltaTriples deltaTriples(index);
+  DeltaTriplesManager deltaTriplesManager(index.getImpl());
+  auto sharedLocatedTriplesSnapshot = deltaTriplesManager.getCurrentSnapshot();
+  const auto& locatedTriplesSnapshot = *sharedLocatedTriplesSnapshot;
   static constexpr size_t col0IdTag = 43;
   auto cancellationDummy = std::make_shared<ad_utility::CancellationHandle<>>();
   auto iriOfHasPattern =
       TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE);
   auto checkSingleElement = [&cancellationDummy, &iriOfHasPattern,
-                             &deltaTriples](const Index& index,
-                                            size_t patternIdx, Id id) {
+                             &locatedTriplesSnapshot](
+                                const Index& index, size_t patternIdx, Id id) {
     auto scanResultHasPattern = index.scan(
         ScanSpecificationAsTripleComponent{iriOfHasPattern, id, std::nullopt},
-        Permutation::Enum::PSO, {}, cancellationDummy, deltaTriples);
+        Permutation::Enum::PSO, {}, cancellationDummy, locatedTriplesSnapshot);
     // Each ID has at most one pattern, it can have none if it doesn't
     // appear as a subject in the knowledge graph.
     AD_CORRECTNESS_CHECK(scanResultHasPattern.numRows() <= 1);
@@ -86,7 +88,7 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
             ScanSpecification{col0Id, std::nullopt, std::nullopt}, permutation,
             std::array{ColumnIndex{ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN},
                        ColumnIndex{ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN}},
-            cancellationDummy, deltaTriples);
+            cancellationDummy, locatedTriplesSnapshot);
         ASSERT_EQ(scanResult.numColumns(), 4u);
         for (const auto& row : scanResult) {
           auto patternIdx = row[2].getInt();
@@ -112,12 +114,12 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
   auto cancellationHandle =
       std::make_shared<ad_utility::CancellationHandle<>>();
   auto predicates = index.getImpl().PSO().getDistinctCol0IdsAndCounts(
-      cancellationHandle, deltaTriples);
+      cancellationHandle, locatedTriplesSnapshot);
   for (const auto& predicate : predicates.getColumn(0)) {
     checkConsistencyForPredicate(predicate);
   }
   auto objects = index.getImpl().OSP().getDistinctCol0IdsAndCounts(
-      cancellationHandle, deltaTriples);
+      cancellationHandle, locatedTriplesSnapshot);
   for (const auto& object : objects.getColumn(0)) {
     checkConsistencyForObject(object);
   }

From 50eda6235af3b3025689af12f13df851800e00f5 Mon Sep 17 00:00:00 2001
From: Julian <14220769+Qup42@users.noreply.github.com>
Date: Fri, 8 Nov 2024 14:22:14 +0100
Subject: [PATCH 10/12] Implement a function that executes an UPDATE request
 (#1607)

This is another step towards support for `SPARQL UPDATE`. The function takes a `ParsedQuery` that contains a graph update (and UPDATE request that inserts and/or deletes triples that are computed from a WHERE clause to/from a given graph), executes the query, and passes the result to a `DeltaTriples` object.
---
 src/engine/ExecuteUpdate.cpp |  73 ++++++++++++
 src/engine/ExecuteUpdate.h   |  20 ++++
 test/ExecuteUpdateTest.cpp   | 225 ++++++++++++++++++++++++++++-------
 3 files changed, 277 insertions(+), 41 deletions(-)

diff --git a/src/engine/ExecuteUpdate.cpp b/src/engine/ExecuteUpdate.cpp
index ef27c6a8d4..55564de978 100644
--- a/src/engine/ExecuteUpdate.cpp
+++ b/src/engine/ExecuteUpdate.cpp
@@ -6,6 +6,21 @@
 
 #include "engine/ExportQueryExecutionTrees.h"
 
+// _____________________________________________________________________________
+void ExecuteUpdate::executeUpdate(
+    const Index& index, const ParsedQuery& query, const QueryExecutionTree& qet,
+    DeltaTriples& deltaTriples, const CancellationHandle& cancellationHandle) {
+  auto [toInsert, toDelete] =
+      computeGraphUpdateQuads(index, query, qet, cancellationHandle);
+
+  // "The deletion of the triples happens before the insertion." (SPARQL 1.1
+  // Update 3.1.3)
+  deltaTriples.deleteTriples(cancellationHandle,
+                             std::move(toDelete.idTriples_));
+  deltaTriples.insertTriples(cancellationHandle,
+                             std::move(toInsert.idTriples_));
+}
+
 // _____________________________________________________________________________
 std::pair<std::vector<ExecuteUpdate::TransformedTriple>, LocalVocab>
 ExecuteUpdate::transformTriplesTemplate(
@@ -99,3 +114,61 @@ void ExecuteUpdate::computeAndAddQuadsForResultRow(
     result.emplace_back(std::array{*subject, *predicate, *object, *graph});
   }
 }
+
+// _____________________________________________________________________________
+std::pair<ExecuteUpdate::IdTriplesAndLocalVocab,
+          ExecuteUpdate::IdTriplesAndLocalVocab>
+ExecuteUpdate::computeGraphUpdateQuads(
+    const Index& index, const ParsedQuery& query, const QueryExecutionTree& qet,
+    const CancellationHandle& cancellationHandle) {
+  AD_CONTRACT_CHECK(query.hasUpdateClause());
+  auto updateClause = query.updateClause();
+  if (!std::holds_alternative<updateClause::GraphUpdate>(updateClause.op_)) {
+    throw std::runtime_error(
+        "Only INSERT/DELETE update operations are currently supported.");
+  }
+  auto graphUpdate = std::get<updateClause::GraphUpdate>(updateClause.op_);
+  // Fully materialize the result for now. This makes it easier to execute the
+  // update.
+  auto res = qet.getResult(false);
+
+  const auto& vocab = index.getVocab();
+
+  auto prepareTemplateAndResultContainer =
+      [&vocab, &qet,
+       &res](std::vector<SparqlTripleSimpleWithGraph>&& tripleTemplates) {
+        auto [transformedTripleTemplates, localVocab] =
+            transformTriplesTemplate(vocab, qet.getVariableColumns(),
+                                     std::move(tripleTemplates));
+        std::vector<IdTriple<>> updateTriples;
+        // The maximum result size is size(query result) x num template rows.
+        // The actual result can be smaller if there are template rows with
+        // variables for which a result row does not have a value.
+        updateTriples.reserve(res->idTable().size() *
+                              transformedTripleTemplates.size());
+
+        return std::tuple{std::move(transformedTripleTemplates),
+                          std::move(updateTriples), std::move(localVocab)};
+      };
+
+  auto [toInsertTemplates, toInsert, localVocabInsert] =
+      prepareTemplateAndResultContainer(std::move(graphUpdate.toInsert_));
+  auto [toDeleteTemplates, toDelete, localVocabDelete] =
+      prepareTemplateAndResultContainer(std::move(graphUpdate.toDelete_));
+
+  for (const auto& [pair, range] :
+       ExportQueryExecutionTrees::getRowIndices(query._limitOffset, *res)) {
+    auto& idTable = pair.idTable_;
+    for (const uint64_t i : range) {
+      computeAndAddQuadsForResultRow(toInsertTemplates, toInsert, idTable, i);
+      cancellationHandle->throwIfCancelled();
+
+      computeAndAddQuadsForResultRow(toDeleteTemplates, toDelete, idTable, i);
+      cancellationHandle->throwIfCancelled();
+    }
+  }
+
+  return {
+      IdTriplesAndLocalVocab{std::move(toInsert), std::move(localVocabInsert)},
+      IdTriplesAndLocalVocab{std::move(toDelete), std::move(localVocabDelete)}};
+}
diff --git a/src/engine/ExecuteUpdate.h b/src/engine/ExecuteUpdate.h
index 729e65d51c..3cf686ed14 100644
--- a/src/engine/ExecuteUpdate.h
+++ b/src/engine/ExecuteUpdate.h
@@ -16,6 +16,13 @@ class ExecuteUpdate {
   using IdOrVariableIndex = std::variant<Id, ColumnIndex>;
   using TransformedTriple = std::array<IdOrVariableIndex, 4>;
 
+  // Execute an update. This function is comparable to
+  // `ExportQueryExecutionTrees::computeResult` for queries.
+  static void executeUpdate(const Index& index, const ParsedQuery& query,
+                            const QueryExecutionTree& qet,
+                            DeltaTriples& deltaTriples,
+                            const CancellationHandle& cancellationHandle);
+
  private:
   // Resolve all `TripleComponent`s and `Graph`s in a vector of
   // `SparqlTripleSimpleWithGraph` into `Variable`s or `Id`s.
@@ -41,4 +48,17 @@ class ExecuteUpdate {
       const std::vector<TransformedTriple>& templates,
       std::vector<IdTriple<>>& result, const IdTable& idTable, uint64_t rowIdx);
   FRIEND_TEST(ExecuteUpdate, computeAndAddQuadsForResultRow);
+
+  struct IdTriplesAndLocalVocab {
+    std::vector<IdTriple<>> idTriples_;
+    LocalVocab localVocab_;
+  };
+  // Compute the set of quads to insert and delete for the given update. The
+  // ParsedQuery's clause must be an UpdateClause. The UpdateClause's operation
+  // must be a GraphUpdate.
+  static std::pair<IdTriplesAndLocalVocab, IdTriplesAndLocalVocab>
+  computeGraphUpdateQuads(const Index& index, const ParsedQuery& query,
+                          const QueryExecutionTree& qet,
+                          const CancellationHandle& cancellationHandle);
+  FRIEND_TEST(ExecuteUpdate, computeGraphUpdateQuads);
 };
diff --git a/test/ExecuteUpdateTest.cpp b/test/ExecuteUpdateTest.cpp
index 08c4ec284e..5367d8ec7d 100644
--- a/test/ExecuteUpdateTest.cpp
+++ b/test/ExecuteUpdateTest.cpp
@@ -32,6 +32,153 @@ MATCHER_P(AlwaysFalse, msg, "") {
   return false;
 }
 
+// _____________________________________________________________________________
+TEST(ExecuteUpdate, executeUpdate) {
+  auto executeUpdate = [](const std::string& update) {
+    // These tests run on the default dataset defined in
+    // `IndexTestHelpers::makeTestIndex`.
+    QueryExecutionContext* qec = ad_utility::testing::getQec(std::nullopt);
+    const Index& index = qec->getIndex();
+    DeltaTriples deltaTriples{index};
+    const auto sharedHandle =
+        std::make_shared<ad_utility::CancellationHandle<>>();
+    const std::vector<DatasetClause> datasets = {};
+    auto pq = SparqlParser::parseQuery(update);
+    QueryPlanner qp{qec, sharedHandle};
+    const auto qet = qp.createExecutionTree(pq);
+    ExecuteUpdate::executeUpdate(index, pq, qet, deltaTriples, sharedHandle);
+    return deltaTriples;
+  };
+  auto expectExecuteUpdate =
+      [&executeUpdate](
+          const std::string& update,
+          const testing::Matcher<const DeltaTriples&>& deltaTriplesMatcher) {
+        EXPECT_THAT(executeUpdate(update), deltaTriplesMatcher);
+      };
+  auto expectExecuteUpdateFails =
+      [&executeUpdate](
+          const std::string& update,
+          const testing::Matcher<const std::string&>& messageMatcher) {
+        AD_EXPECT_THROW_WITH_MESSAGE(executeUpdate(update), messageMatcher);
+      };
+  expectExecuteUpdate("INSERT DATA { <s> <p> <o> . }", NumTriples(1, 0, 1));
+  expectExecuteUpdate("DELETE DATA { <z> <label> \"zz\"@en }",
+                      NumTriples(0, 1, 1));
+  expectExecuteUpdate(
+      "DELETE { ?s <is-a> ?o } INSERT { <a> <b> <c> } WHERE { ?s <is-a> ?o }",
+      NumTriples(1, 2, 3));
+  expectExecuteUpdate(
+      "DELETE { <a> <b> <c> } INSERT { <a> <b> <c> } WHERE { ?s <is-a> ?o }",
+      NumTriples(1, 0, 1));
+  expectExecuteUpdate(
+      "DELETE { ?s <is-a> ?o } INSERT { ?s <is-a> ?o } WHERE { ?s <is-a> ?o }",
+      NumTriples(2, 0, 2));
+  expectExecuteUpdate("DELETE WHERE { ?s ?p ?o }", NumTriples(0, 8, 8));
+  expectExecuteUpdateFails(
+      "SELECT * WHERE { ?s ?p ?o }",
+      testing::HasSubstr("Assertion `query.hasUpdateClause()` failed."));
+  expectExecuteUpdateFails(
+      "CLEAR DEFAULT",
+      testing::HasSubstr(
+          "Only INSERT/DELETE update operations are currently supported."));
+}
+
+// _____________________________________________________________________________
+TEST(ExecuteUpdate, computeGraphUpdateQuads) {
+  // These tests run on the default dataset defined in
+  // `IndexTestHelpers::makeTestIndex`.
+  QueryExecutionContext* qec = ad_utility::testing::getQec(std::nullopt);
+  const Index& index = qec->getIndex();
+  const auto Id = ad_utility::testing::makeGetId(index);
+  auto defaultGraphId = Id(std::string{DEFAULT_GRAPH_IRI});
+
+  using namespace ::testing;
+  LocalVocab localVocab;
+  auto LVI = [&localVocab](const std::string& iri) {
+    return Id::makeFromLocalVocabIndex(localVocab.getIndexAndAddIfNotContained(
+        LocalVocabEntry(ad_utility::triple_component::Iri::fromIriref(iri))));
+  };
+
+  auto IdTriple = [defaultGraphId](const ::Id s, const ::Id p, const ::Id o,
+                                   const std::optional<::Id> graph =
+                                       std::nullopt) -> ::IdTriple<> {
+    return ::IdTriple({s, p, o, graph.value_or(defaultGraphId)});
+  };
+
+  auto executeComputeGraphUpdateQuads = [&qec,
+                                         &index](const std::string& update) {
+    const auto sharedHandle =
+        std::make_shared<ad_utility::CancellationHandle<>>();
+    const std::vector<DatasetClause> datasets = {};
+    auto pq = SparqlParser::parseQuery(update);
+    QueryPlanner qp{qec, sharedHandle};
+    const auto qet = qp.createExecutionTree(pq);
+    return ExecuteUpdate::computeGraphUpdateQuads(index, pq, qet, sharedHandle);
+  };
+  auto expectComputeGraphUpdateQuads =
+      [&executeComputeGraphUpdateQuads](
+          const std::string& update,
+          const Matcher<const std::vector<::IdTriple<>>&>& toInsertMatcher,
+          const Matcher<const std::vector<::IdTriple<>>&>& toDeleteMatcher) {
+        EXPECT_THAT(executeComputeGraphUpdateQuads(update),
+                    Pair(AD_FIELD(ExecuteUpdate::IdTriplesAndLocalVocab,
+                                  idTriples_, toInsertMatcher),
+                         AD_FIELD(ExecuteUpdate::IdTriplesAndLocalVocab,
+                                  idTriples_, toDeleteMatcher)));
+      };
+  auto expectComputeGraphUpdateQuadsFails =
+      [&executeComputeGraphUpdateQuads](
+          const std::string& update,
+          const Matcher<const std::string&>& messageMatcher) {
+        AD_EXPECT_THROW_WITH_MESSAGE(executeComputeGraphUpdateQuads(update),
+                                     messageMatcher);
+      };
+
+  expectComputeGraphUpdateQuads(
+      "INSERT DATA { <s> <p> <o> . }",
+      ElementsAreArray({IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>"))}),
+      IsEmpty());
+  expectComputeGraphUpdateQuads(
+      "DELETE DATA { <z> <label> \"zz\"@en }", IsEmpty(),
+      ElementsAreArray({IdTriple(Id("<z>"), Id("<label>"), Id("\"zz\"@en"))}));
+  expectComputeGraphUpdateQuads(
+      "DELETE { ?s <is-a> ?o } INSERT { <s> <p> <o> } WHERE { ?s <is-a> ?o }",
+      ElementsAreArray({IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>")),
+                        IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>"))}),
+      ElementsAreArray({IdTriple(Id("<x>"), Id("<is-a>"), Id("<y>")),
+                        IdTriple(Id("<y>"), Id("<is-a>"), Id("<x>"))}));
+  expectComputeGraphUpdateQuads(
+      "DELETE { <s> <p> <o> } INSERT { <s> <p> <o> } WHERE { ?s <is-a> ?o }",
+      ElementsAreArray({IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>")),
+                        IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>"))}),
+      ElementsAreArray({IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>")),
+                        IdTriple(LVI("<s>"), LVI("<p>"), LVI("<o>"))}));
+  expectComputeGraphUpdateQuads(
+      "DELETE { ?s <is-a> ?o } INSERT { ?s <is-a> ?o } WHERE { ?s <is-a> ?o }",
+      ElementsAreArray({IdTriple(Id("<x>"), Id("<is-a>"), Id("<y>")),
+                        IdTriple(Id("<y>"), Id("<is-a>"), Id("<x>"))}),
+      ElementsAreArray({IdTriple(Id("<x>"), Id("<is-a>"), Id("<y>")),
+                        IdTriple(Id("<y>"), Id("<is-a>"), Id("<x>"))}));
+  expectComputeGraphUpdateQuads(
+      "DELETE WHERE { ?s ?p ?o }", IsEmpty(),
+      UnorderedElementsAreArray(
+          {IdTriple(Id("<x>"), Id("<label>"), Id("\"alpha\"")),
+           IdTriple(Id("<x>"), Id("<label>"), Id("\"älpha\"")),
+           IdTriple(Id("<x>"), Id("<label>"), Id("\"A\"")),
+           IdTriple(Id("<x>"), Id("<label>"), Id("\"Beta\"")),
+           IdTriple(Id("<x>"), Id("<is-a>"), Id("<y>")),
+           IdTriple(Id("<y>"), Id("<is-a>"), Id("<x>")),
+           IdTriple(Id("<z>"), Id("<label>"), Id("\"zz\"@en")),
+           IdTriple(Id("<zz>"), Id("<label>"), Id("<zz>"))}));
+  expectComputeGraphUpdateQuadsFails(
+      "SELECT * WHERE { ?s ?p ?o }",
+      HasSubstr("Assertion `query.hasUpdateClause()` failed."));
+  expectComputeGraphUpdateQuadsFails(
+      "CLEAR DEFAULT",
+      HasSubstr(
+          "Only INSERT/DELETE update operations are currently supported."));
+}
+
 // _____________________________________________________________________________
 TEST(ExecuteUpdate, transformTriplesTemplate) {
   // Create an index for testing.
@@ -41,6 +188,7 @@ TEST(ExecuteUpdate, transformTriplesTemplate) {
   auto& vocab = const_cast<Index::Vocab&>(index.getVocab());
 
   // Helpers
+  using namespace ::testing;
   const auto Id = ad_utility::testing::makeGetId(index);
   using Graph = SparqlTripleSimpleWithGraph::Graph;
   using LocalVocab = ad_utility::triple_component::LiteralOrIri;
@@ -53,16 +201,16 @@ TEST(ExecuteUpdate, transformTriplesTemplate) {
         literal);
   };
   // Matchers
-  using MatcherType = testing::Matcher<const ExecuteUpdate::IdOrVariableIndex&>;
+  using MatcherType = Matcher<const ExecuteUpdate::IdOrVariableIndex&>;
   auto TripleComponentMatcher = [](const ::LocalVocab& localVocab,
                                    TripleComponentT component) -> MatcherType {
     return std::visit(
         ad_utility::OverloadCallOperator{
             [](const ::Id& id) -> MatcherType {
-              return testing::VariantWith<::Id>(testing::Eq(id));
+              return VariantWith<::Id>(Eq(id));
             },
             [](const ColumnIndex& index) -> MatcherType {
-              return testing::VariantWith<ColumnIndex>(testing::Eq(index));
+              return VariantWith<ColumnIndex>(Eq(index));
             },
             [&localVocab](
                 const ad_utility::triple_component::LiteralOrIri& literalOrIri)
@@ -74,8 +222,8 @@ TEST(ExecuteUpdate, transformTriplesTemplate) {
                                  " not in local vocab"));
               }
               const auto id = Id::makeFromLocalVocabIndex(lviOpt.value());
-              return testing::VariantWith<::Id>(
-                  AD_PROPERTY(Id, getBits, testing::Eq(id.getBits())));
+              return VariantWith<::Id>(
+                  AD_PROPERTY(Id, getBits, Eq(id.getBits())));
             }},
         component);
   };
@@ -98,12 +246,12 @@ TEST(ExecuteUpdate, transformTriplesTemplate) {
                   TripleComponentMatcher(localVocab, expectedTriple.at(3)));
             });
         EXPECT_THAT(transformedTriples,
-                    testing::ElementsAreArray(transformedTriplesMatchers));
+                    ElementsAreArray(transformedTriplesMatchers));
       };
   auto expectTransformTriplesTemplateFails =
       [&vocab](const VariableToColumnMap& variableColumns,
                std::vector<SparqlTripleSimpleWithGraph>&& triples,
-               const testing::Matcher<const std::string&>& messageMatcher) {
+               const Matcher<const std::string&>& messageMatcher) {
         AD_EXPECT_THROW_WITH_MESSAGE(
             ExecuteUpdate::transformTriplesTemplate(vocab, variableColumns,
                                                     std::move(triples)),
@@ -131,14 +279,13 @@ TEST(ExecuteUpdate, transformTriplesTemplate) {
       {},
       {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
                                    Variable("?f"), Graph{}}},
-      testing::HasSubstr(
-          "Assertion `variableColumns.contains(component.getVariable())` "
-          "failed."));
+      HasSubstr("Assertion `variableColumns.contains(component.getVariable())` "
+                "failed."));
   expectTransformTriplesTemplateFails(
       {},
       {SparqlTripleSimpleWithGraph{Literal("\"foo\""), Iri("<bar>"),
                                    Literal("\"foo\""), Graph{Variable("?f")}}},
-      testing::HasSubstr("Assertion `variableColumns.contains(var)` failed."));
+      HasSubstr("Assertion `variableColumns.contains(var)` failed."));
   // Variables in the template are mapped to their column index.
   expectTransformTriplesTemplate(
       {{Variable("?f"), {0, ColumnIndexAndTypeInfo::PossiblyUndefined}}},
@@ -154,22 +301,23 @@ TEST(ExecuteUpdate, transformTriplesTemplate) {
 
 // _____________________________________________________________________________
 TEST(ExecuteUpdate, resolveVariable) {
+  using namespace ::testing;
   const auto idTable =
       makeIdTableFromVector({{V(0), V(1), V(2)},
                              {V(3), V(4), V(5)},
                              {V(6), Id::makeUndefined(), V(8)}});
   auto resolveVariable =
       std::bind_front(&ExecuteUpdate::resolveVariable, std::cref(idTable));
-  EXPECT_THAT(resolveVariable(0, V(10)), testing::Eq(V(10)));
-  EXPECT_THAT(resolveVariable(0, 1UL), testing::Eq(V(1)));
-  EXPECT_THAT(resolveVariable(1, 1UL), testing::Eq(V(4)));
-  EXPECT_THAT(resolveVariable(2, 1UL), testing::Eq(std::nullopt));
-  EXPECT_THAT(resolveVariable(2, Id::makeUndefined()),
-              testing::Eq(std::nullopt));
+  EXPECT_THAT(resolveVariable(0, V(10)), Eq(V(10)));
+  EXPECT_THAT(resolveVariable(0, 1UL), Eq(V(1)));
+  EXPECT_THAT(resolveVariable(1, 1UL), Eq(V(4)));
+  EXPECT_THAT(resolveVariable(2, 1UL), Eq(std::nullopt));
+  EXPECT_THAT(resolveVariable(2, Id::makeUndefined()), Eq(std::nullopt));
 }
 
 // _____________________________________________________________________________
 TEST(ExecuteUpdate, computeAndAddQuadsForResultRow) {
+  using namespace ::testing;
   const auto idTable =
       makeIdTableFromVector({{V(0), V(1), V(2)},
                              {V(3), V(4), V(5)},
@@ -177,45 +325,40 @@ TEST(ExecuteUpdate, computeAndAddQuadsForResultRow) {
   auto expectComputeQuads =
       [](const std::vector<ExecuteUpdate::TransformedTriple>& templates,
          const IdTable& idTable, uint64_t rowIdx,
-         const testing::Matcher<const std::vector<IdTriple<>>&>&
-             expectedQuads) {
+         const Matcher<const std::vector<IdTriple<>>&>& expectedQuads) {
         std::vector<IdTriple<>> result;
         ExecuteUpdate::computeAndAddQuadsForResultRow(templates, result,
                                                       idTable, rowIdx);
         EXPECT_THAT(result, expectedQuads);
       };
   // Compute the quads for an empty template set yields no quads.
-  expectComputeQuads({}, idTable, 0, testing::IsEmpty());
+  expectComputeQuads({}, idTable, 0, IsEmpty());
   // Compute the quads for template without variables yields the templates
   // unmodified.
-  expectComputeQuads(
-      {{V(0), V(1), V(2), V(3)}}, idTable, 0,
-      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(2), V(3)}}}));
-  expectComputeQuads(
-      {{V(0), V(1), V(2), V(3)}}, idTable, 1,
-      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(2), V(3)}}}));
+  expectComputeQuads({{V(0), V(1), V(2), V(3)}}, idTable, 0,
+                     ElementsAreArray({IdTriple{{V(0), V(1), V(2), V(3)}}}));
+  expectComputeQuads({{V(0), V(1), V(2), V(3)}}, idTable, 1,
+                     ElementsAreArray({IdTriple{{V(0), V(1), V(2), V(3)}}}));
   // The variables in templates are resolved to the value of the variable in the
   // specified row of the result.
-  expectComputeQuads(
-      {{0UL, V(1), 1UL, V(3)}}, idTable, 0,
-      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(1), V(3)}}}));
-  expectComputeQuads(
-      {{0UL, V(1), 1UL, V(3)}}, idTable, 1,
-      testing::ElementsAreArray({IdTriple{{V(3), V(1), V(4), V(3)}}}));
+  expectComputeQuads({{0UL, V(1), 1UL, V(3)}}, idTable, 0,
+                     ElementsAreArray({IdTriple{{V(0), V(1), V(1), V(3)}}}));
+  expectComputeQuads({{0UL, V(1), 1UL, V(3)}}, idTable, 1,
+                     ElementsAreArray({IdTriple{{V(3), V(1), V(4), V(3)}}}));
   // Quads with undefined IDs cannot be stored and are not returned.
-  expectComputeQuads({{0UL, V(1), 1UL, V(3)}}, idTable, 2, testing::IsEmpty());
+  expectComputeQuads({{0UL, V(1), 1UL, V(3)}}, idTable, 2, IsEmpty());
   expectComputeQuads({{V(0), V(1), Id::makeUndefined(), V(3)}}, idTable, 0,
-                     testing::IsEmpty());
+                     IsEmpty());
   // Some extra cases to cover all branches.
   expectComputeQuads({{Id::makeUndefined(), V(1), V(2), V(3)}}, idTable, 0,
-                     testing::IsEmpty());
+                     IsEmpty());
   expectComputeQuads({{V(0), Id::makeUndefined(), V(2), V(3)}}, idTable, 0,
-                     testing::IsEmpty());
+                     IsEmpty());
   expectComputeQuads({{V(0), V(1), V(2), Id::makeUndefined()}}, idTable, 0,
-                     testing::IsEmpty());
+                     IsEmpty());
   // All the templates are evaluated for the specified row of the result.
-  expectComputeQuads(
-      {{0UL, V(1), 1UL, V(3)}, {V(0), 1UL, 2UL, V(3)}}, idTable, 0,
-      testing::ElementsAreArray({IdTriple{{V(0), V(1), V(1), V(3)}},
-                                 IdTriple{{V(0), V(1), V(2), V(3)}}}));
+  expectComputeQuads({{0UL, V(1), 1UL, V(3)}, {V(0), 1UL, 2UL, V(3)}}, idTable,
+                     0,
+                     ElementsAreArray({IdTriple{{V(0), V(1), V(1), V(3)}},
+                                       IdTriple{{V(0), V(1), V(2), V(3)}}}));
 }

From 0fadfc18405b7045e0f8c2ba6790ccad2b1572ac Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@informatik.uni-freiburg.de>
Date: Fri, 8 Nov 2024 14:39:24 +0100
Subject: [PATCH 11/12] Fix bug for `FILTER` with negated expressions (#1587)

For expression results represented as a set of intervals, negation can lead to an "infinitely" large interval end. This was not adequately handled in the implementation of `FILTER`, which led to a `vector::reserve` exception. This is now fixed. Use the occasion to improve the documentation of the code.
---
 src/engine/Filter.cpp | 55 ++++++++++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp
index 79ef7572ad..db21cf7c43 100644
--- a/src/engine/Filter.cpp
+++ b/src/engine/Filter.cpp
@@ -122,41 +122,63 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable,
   sparqlExpression::ExpressionResult expressionResult =
       _expression.getPimpl()->evaluate(&evaluationContext);
 
-  // Note: the explicit (seemingly redundant) capture of `resultTable` is
+  // Filter `input` by `expressionResult` and store the result in `resultTable`.
+  // This is a lambda because `expressionResult` is a `std::variant`.
+  //
+  // NOTE: the explicit (seemingly redundant) capture of `resultTable` is
   // required to work around a bug in Clang 17, see
   // https://github.com/llvm/llvm-project/issues/61267
-  auto visitor =
+  auto computeResult =
       [this, &resultTable = resultTable, &input,
        &evaluationContext]<sparqlExpression::SingleExpressionResult T>(
           T&& singleResult) {
         if constexpr (std::is_same_v<T, ad_utility::SetOfIntervals>) {
+          AD_CONTRACT_CHECK(input.size() == evaluationContext.size());
+          // If the expression result is given as a set of intervals, we copy
+          // the corresponding parts of `input` to `resultTable`.
+          //
+          // NOTE: One of the interval ends may be larger than `input.size()`
+          // (as the result of a negation).
           auto totalSize = std::accumulate(
               singleResult._intervals.begin(), singleResult._intervals.end(),
-              resultTable.size(), [](const auto& sum, const auto& interval) {
-                return sum + (interval.second - interval.first);
+              resultTable.size(),
+              [&input](const auto& sum, const auto& interval) {
+                size_t intervalBegin = interval.first;
+                size_t intervalEnd = std::min(interval.second, input.size());
+                return sum + (intervalEnd - intervalBegin);
               });
           resultTable.reserve(totalSize);
           checkCancellation();
-          for (auto [beg, end] : singleResult._intervals) {
-            AD_CONTRACT_CHECK(end <= input.size());
-            resultTable.insertAtEnd(input.cbegin() + beg, input.cbegin() + end);
+          for (auto [intervalBegin, intervalEnd] : singleResult._intervals) {
+            intervalEnd = std::min(intervalEnd, input.size());
+            resultTable.insertAtEnd(input.cbegin() + intervalBegin,
+                                    input.cbegin() + intervalEnd);
             checkCancellation();
           }
           AD_CONTRACT_CHECK(resultTable.size() == totalSize);
         } else {
-          // All other results are converted to boolean values via the
-          // `EffectiveBooleanValueGetter`. This means for example, that zero,
-          // UNDEF, and empty strings are filtered out.
-          // TODO<joka921> Check whether it's feasible to precompute and reserve
-          // the total size. This depends on the expensiveness of the
-          // `EffectiveBooleanValueGetter`.
+          // In the general case, we generate all expression results and apply
+          // the `EffectiveBooleanValueGetter` to each.
+          //
+          // NOTE: According to the standard, this means that values like zero,
+          // UNDEF, and empty strings are converted to `false` and hence the
+          // corresponding rows from `input` are filtered out.
+          //
+          // TODO<joka921> Check whether it is feasible to precompute the
+          // number of `true` values and use that to reserve the right
+          // amount of space for `resultTable`, like we do it for the set of
+          // intervals above. This depends on how expensive the evaluation with
+          // the `EffectiveBooleanValueGetter` is.
           auto resultGenerator = sparqlExpression::detail::makeGenerator(
               std::forward<T>(singleResult), input.size(), &evaluationContext);
           size_t i = 0;
 
-          using EBV = sparqlExpression::detail::EffectiveBooleanValueGetter;
+          using ValueGetter =
+              sparqlExpression::detail::EffectiveBooleanValueGetter;
+          ValueGetter valueGetter{};
           for (auto&& resultValue : resultGenerator) {
-            if (EBV{}(resultValue, &evaluationContext) == EBV::Result::True) {
+            if (valueGetter(resultValue, &evaluationContext) ==
+                ValueGetter::Result::True) {
               resultTable.push_back(input[i]);
             }
             checkCancellation();
@@ -164,8 +186,7 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable,
           }
         }
       };
-
-  std::visit(visitor, std::move(expressionResult));
+  std::visit(computeResult, std::move(expressionResult));
 
   dynamicResultTable = std::move(resultTable).toDynamic();
   checkCancellation();

From 1bcfeeb67e09e9c50a877e989045f3868f84e76a Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@informatik.uni-freiburg.de>
Date: Sun, 10 Nov 2024 19:37:58 +0100
Subject: [PATCH 12/12] Separate the effects of `send` and `LIMIT` again
 (#1488)

Since https://github.com/ad-freiburg/qlever/pull/1355, the query `LIMIT` is clamped to the value of the QLever-specific `send` parameter. In particular, this broke the display of the result size in the QLever UI, which sets `send=100` when showing the first page of results for a query.

This change restores the old behavior for the `send` parameter, yet makes good use of the new lazy query processing. Specifically, lazily computed results blocks are now processed as follows: (1) the first result blocks before OFFSET are computed but skipped; (2) then results are computed and materialized until the value of the `send` parameter is reached; (3) then results are computed and counted but *not* materialized until the LIMIT is reached; (3) all remaining blocks are not even computed.

The QLever JSON now has two new fields `resultSizeTotal` and `resultSizeExported`, with the corresponding values. For the sake of backwards-compatibility, the old `resultsize` field is still there and has the same value as the `resultSizeTotal` field. For the same reason, the `send` parameter keeps its name for now, but should be renamed to `exportLimit` eventually.

On the side fixed, dropped the hard limit of `MAX_NOF_ROWS_IN_RESULT = 1'000'000` for JSON results. Also fix the compilation error introduced by the interplay of the merge of #1603 and #1607 . Fixes #1605 and #1455 .
---
 src/engine/ExecuteUpdate.cpp             |  11 +-
 src/engine/ExportQueryExecutionTrees.cpp | 183 +++++++++++++++++------
 src/engine/ExportQueryExecutionTrees.h   | 115 +++++++-------
 src/engine/Operation.cpp                 |   3 +-
 src/engine/Server.cpp                    |  50 +++----
 src/engine/Server.h                      |   2 +-
 src/index/DeltaTriples.h                 |   1 +
 src/parser/data/LimitOffsetClause.h      |  22 ++-
 test/ExecuteUpdateTest.cpp               |  30 ++--
 test/ExportQueryExecutionTreesTest.cpp   | 169 ++++++++++++++-------
 10 files changed, 370 insertions(+), 216 deletions(-)

diff --git a/src/engine/ExecuteUpdate.cpp b/src/engine/ExecuteUpdate.cpp
index 55564de978..f6c7ea43a6 100644
--- a/src/engine/ExecuteUpdate.cpp
+++ b/src/engine/ExecuteUpdate.cpp
@@ -130,13 +130,13 @@ ExecuteUpdate::computeGraphUpdateQuads(
   auto graphUpdate = std::get<updateClause::GraphUpdate>(updateClause.op_);
   // Fully materialize the result for now. This makes it easier to execute the
   // update.
-  auto res = qet.getResult(false);
+  auto result = qet.getResult(false);
 
   const auto& vocab = index.getVocab();
 
   auto prepareTemplateAndResultContainer =
       [&vocab, &qet,
-       &res](std::vector<SparqlTripleSimpleWithGraph>&& tripleTemplates) {
+       &result](std::vector<SparqlTripleSimpleWithGraph>&& tripleTemplates) {
         auto [transformedTripleTemplates, localVocab] =
             transformTriplesTemplate(vocab, qet.getVariableColumns(),
                                      std::move(tripleTemplates));
@@ -144,7 +144,7 @@ ExecuteUpdate::computeGraphUpdateQuads(
         // The maximum result size is size(query result) x num template rows.
         // The actual result can be smaller if there are template rows with
         // variables for which a result row does not have a value.
-        updateTriples.reserve(res->idTable().size() *
+        updateTriples.reserve(result->idTable().size() *
                               transformedTripleTemplates.size());
 
         return std::tuple{std::move(transformedTripleTemplates),
@@ -156,8 +156,9 @@ ExecuteUpdate::computeGraphUpdateQuads(
   auto [toDeleteTemplates, toDelete, localVocabDelete] =
       prepareTemplateAndResultContainer(std::move(graphUpdate.toDelete_));
 
-  for (const auto& [pair, range] :
-       ExportQueryExecutionTrees::getRowIndices(query._limitOffset, *res)) {
+  uint64_t resultSize = 0;
+  for (const auto& [pair, range] : ExportQueryExecutionTrees::getRowIndices(
+           query._limitOffset, *result, resultSize)) {
     auto& idTable = pair.idTable_;
     for (const uint64_t i : range) {
       computeAndAddQuadsForResultRow(toInsertTemplates, toInsert, idTable, i);
diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp
index 811ed47059..6cea915b55 100644
--- a/src/engine/ExportQueryExecutionTrees.cpp
+++ b/src/engine/ExportQueryExecutionTrees.cpp
@@ -1,6 +1,8 @@
 // Copyright 2022 - 2024, University of Freiburg
 // Chair of Algorithms and Data Structures
-// Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Authors: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+//          Robin Textor-Falconi <textorr@cs.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
 
 #include "ExportQueryExecutionTrees.h"
 
@@ -83,31 +85,82 @@ ExportQueryExecutionTrees::getIdTables(const Result& result) {
   }
 }
 
-// Return a range that contains the indices of the rows that have to be exported
-// from the `idTable` given the `LimitOffsetClause`. It takes into account the
-// LIMIT, the OFFSET, and the actual size of the `idTable`
+// _____________________________________________________________________________
 cppcoro::generator<ExportQueryExecutionTrees::TableWithRange>
 ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset,
-                                         const Result& result) {
+                                         const Result& result,
+                                         uint64_t& resultSize) {
+  // The first call initializes the `resultSize` to zero (no need to
+  // initialize it outside of the function).
+  resultSize = 0;
+
+  // If the LIMIT is zero, there are no blocks to yield and the total result
+  // size is zero.
   if (limitOffset._limit.value_or(1) == 0) {
     co_return;
   }
+
+  // The effective offset, limit, and export limit. These will be updated after
+  // each block, see `updateEffectiveOffsetAndLimits` below. If they were not
+  // specified, they are initialized to their default values (0 for the offset
+  // and `std::numeric_limits<uint64_t>::max()` for the two limits).
+  uint64_t effectiveOffset = limitOffset._offset;
+  uint64_t effectiveLimit = limitOffset.limitOrDefault();
+  uint64_t effectiveExportLimit = limitOffset.exportLimitOrDefault();
+
+  // Make sure that the export limit is at most the limit (increasing the
+  // export limit beyond the limit has no effect).
+  effectiveExportLimit = std::min(effectiveExportLimit, effectiveLimit);
+
+  // Iterate over the result in blocks.
   for (TableConstRefWithVocab& tableWithVocab : getIdTables(result)) {
-    uint64_t currentOffset =
-        limitOffset.actualOffset(tableWithVocab.idTable_.numRows());
-    uint64_t upperBound =
-        limitOffset.upperBound(tableWithVocab.idTable_.numRows());
-    if (currentOffset != upperBound) {
-      co_yield {std::move(tableWithVocab),
-                std::views::iota(currentOffset, upperBound)};
+    // If all rows in the current block are before the effective offset, we can
+    // skip the block entirely. If not, there is at least something to count
+    // and maybe also something to yield.
+    uint64_t currentBlockSize = tableWithVocab.idTable_.numRows();
+    if (effectiveOffset >= currentBlockSize) {
+      effectiveOffset -= currentBlockSize;
+      continue;
     }
-    limitOffset._offset -= currentOffset;
-    if (limitOffset._limit.has_value()) {
-      limitOffset._limit =
-          limitOffset._limit.value() - (upperBound - currentOffset);
+    AD_CORRECTNESS_CHECK(effectiveOffset < currentBlockSize);
+    AD_CORRECTNESS_CHECK(effectiveLimit > 0);
+
+    // Compute the range of rows to be exported (can by zero) and to be counted
+    // (always non-zero at this point).
+    uint64_t rangeBegin = effectiveOffset;
+    uint64_t numRowsToBeExported =
+        std::min(effectiveExportLimit, currentBlockSize - rangeBegin);
+    uint64_t numRowsToBeCounted =
+        std::min(effectiveLimit, currentBlockSize - rangeBegin);
+    AD_CORRECTNESS_CHECK(rangeBegin + numRowsToBeExported <= currentBlockSize);
+    AD_CORRECTNESS_CHECK(rangeBegin + numRowsToBeCounted <= currentBlockSize);
+    AD_CORRECTNESS_CHECK(numRowsToBeCounted > 0);
+
+    // If there is something to be exported, yield it.
+    if (numRowsToBeExported > 0) {
+      co_yield {std::move(tableWithVocab),
+                std::views::iota(rangeBegin, rangeBegin + numRowsToBeExported)};
     }
-    if (limitOffset._limit.value_or(1) == 0) {
-      break;
+
+    // Add to `resultSize` and update the effective offset (which becomes zero
+    // after the first non-skipped block) and limits (make sure to never go
+    // below zero and `std::numeric_limits<uint64_t>::max()` stays there).
+    resultSize += numRowsToBeCounted;
+    effectiveOffset = 0;
+    auto reduceLimit = [&](uint64_t& limit, uint64_t subtrahend) {
+      if (limit != std::numeric_limits<uint64_t>::max()) {
+        limit = limit > subtrahend ? limit - subtrahend : 0;
+      }
+    };
+    reduceLimit(effectiveLimit, numRowsToBeCounted);
+    reduceLimit(effectiveExportLimit, numRowsToBeCounted);
+
+    // If the effective limit is zero, there is nothing to yield and nothing
+    // to count anymore. This should come at the end of this loop and not at
+    // the beginning, to avoid unnecessarily fetching another block from
+    // `result`.
+    if (effectiveLimit == 0) {
+      co_return;
     }
   }
 }
@@ -118,8 +171,9 @@ ExportQueryExecutionTrees::constructQueryResultToTriples(
     const QueryExecutionTree& qet,
     const ad_utility::sparql_types::Triples& constructTriples,
     LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> result,
-    CancellationHandle cancellationHandle) {
-  for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) {
+    uint64_t& resultSize, CancellationHandle cancellationHandle) {
+  for (const auto& [pair, range] :
+       getRowIndices(limitAndOffset, *result, resultSize)) {
     auto& idTable = pair.idTable_;
     for (uint64_t i : range) {
       ConstructQueryExportContext context{i, idTable, pair.localVocab_,
@@ -140,6 +194,11 @@ ExportQueryExecutionTrees::constructQueryResultToTriples(
       }
     }
   }
+  // For each result from the WHERE clause, we produce up to
+  // `constructTriples.size()` triples. We do not account for triples that are
+  // filtered out because one of the components is UNDEF (it would require
+  // materializing the whole result).
+  resultSize *= constructTriples.size();
 }
 
 // _____________________________________________________________________________
@@ -151,9 +210,10 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
         LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> result,
         CancellationHandle cancellationHandle) {
   result->logResultSize();
-  auto generator =
-      constructQueryResultToTriples(qet, constructTriples, limitAndOffset,
-                                    result, std::move(cancellationHandle));
+  [[maybe_unused]] uint64_t resultSize = 0;
+  auto generator = constructQueryResultToTriples(
+      qet, constructTriples, limitAndOffset, result, resultSize,
+      std::move(cancellationHandle));
   for (const auto& triple : generator) {
     co_yield triple.subject_;
     co_yield ' ';
@@ -182,11 +242,12 @@ cppcoro::generator<std::string>
 ExportQueryExecutionTrees::constructQueryResultBindingsToQLeverJSON(
     const QueryExecutionTree& qet,
     const ad_utility::sparql_types::Triples& constructTriples,
-    const LimitOffsetClause& limitAndOffset, std::shared_ptr<const Result> res,
+    const LimitOffsetClause& limitAndOffset,
+    std::shared_ptr<const Result> result, uint64_t& resultSize,
     CancellationHandle cancellationHandle) {
-  auto generator = constructQueryResultToTriples(qet, constructTriples,
-                                                 limitAndOffset, std::move(res),
-                                                 std::move(cancellationHandle));
+  auto generator = constructQueryResultToTriples(
+      qet, constructTriples, limitAndOffset, std::move(result), resultSize,
+      std::move(cancellationHandle));
   for (auto& triple : generator) {
     auto binding = nlohmann::json::array({std::move(triple.subject_),
                                           std::move(triple.predicate_),
@@ -230,12 +291,13 @@ nlohmann::json idTableToQLeverJSONRow(
 // _____________________________________________________________________________
 cppcoro::generator<std::string>
 ExportQueryExecutionTrees::idTableToQLeverJSONBindings(
-    const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset,
+    const QueryExecutionTree& qet, LimitOffsetClause limitAndOffset,
     const QueryExecutionTree::ColumnIndicesAndTypes columns,
-    std::shared_ptr<const Result> result,
+    std::shared_ptr<const Result> result, uint64_t& resultSize,
     CancellationHandle cancellationHandle) {
   AD_CORRECTNESS_CHECK(result != nullptr);
-  for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) {
+  for (const auto& [pair, range] :
+       getRowIndices(limitAndOffset, *result, resultSize)) {
     for (uint64_t rowIndex : range) {
       co_yield idTableToQLeverJSONRow(qet, columns, pair.localVocab_, rowIndex,
                                       pair.idTable_)
@@ -440,7 +502,7 @@ ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON(
     const QueryExecutionTree& qet,
     const parsedQuery::SelectClause& selectClause,
     const LimitOffsetClause& limitAndOffset,
-    std::shared_ptr<const Result> result,
+    std::shared_ptr<const Result> result, uint64_t& resultSize,
     CancellationHandle cancellationHandle) {
   AD_CORRECTNESS_CHECK(result != nullptr);
   LOG(DEBUG) << "Resolving strings for finished binary result...\n";
@@ -448,7 +510,7 @@ ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON(
       qet.selectedVariablesToColumnIndices(selectClause, true);
 
   return idTableToQLeverJSONBindings(qet, limitAndOffset, selectedColumnIndices,
-                                     std::move(result),
+                                     std::move(result), resultSize,
                                      std::move(cancellationHandle));
 }
 
@@ -479,7 +541,9 @@ ExportQueryExecutionTrees::selectQueryResultToStream(
 
   // special case : binary export of IdTable
   if constexpr (format == MediaType::octetStream) {
-    for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) {
+    uint64_t resultSize = 0;
+    for (const auto& [pair, range] :
+         getRowIndices(limitAndOffset, *result, resultSize)) {
       for (uint64_t i : range) {
         for (const auto& columnIndex : selectedColumnIndices) {
           if (columnIndex.has_value()) {
@@ -510,7 +574,9 @@ ExportQueryExecutionTrees::selectQueryResultToStream(
   constexpr auto& escapeFunction = format == MediaType::tsv
                                        ? RdfEscaping::escapeForTsv
                                        : RdfEscaping::escapeForCsv;
-  for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) {
+  uint64_t resultSize = 0;
+  for (const auto& [pair, range] :
+       getRowIndices(limitAndOffset, *result, resultSize)) {
     for (uint64_t i : range) {
       for (size_t j = 0; j < selectedColumnIndices.size(); ++j) {
         if (selectedColumnIndices[j].has_value()) {
@@ -635,7 +701,9 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
   auto selectedColumnIndices =
       qet.selectedVariablesToColumnIndices(selectClause, false);
   // TODO<joka921> we could prefilter for the nonexisting variables.
-  for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) {
+  uint64_t resultSize = 0;
+  for (const auto& [pair, range] :
+       getRowIndices(limitAndOffset, *result, resultSize)) {
     for (uint64_t i : range) {
       co_yield "\n  <result>";
       for (size_t j = 0; j < selectedColumnIndices.size(); ++j) {
@@ -702,7 +770,9 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees::
   };
 
   bool isFirstRow = true;
-  for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) {
+  uint64_t resultSize = 0;
+  for (const auto& [pair, range] :
+       getRowIndices(limitAndOffset, *result, resultSize)) {
     for (uint64_t i : range) {
       if (!isFirstRow) [[likely]] {
         co_yield ",";
@@ -743,9 +813,10 @@ ExportQueryExecutionTrees::constructQueryResultToStream(
                                        ? RdfEscaping::escapeForTsv
                                        : RdfEscaping::escapeForCsv;
   constexpr char sep = format == MediaType::tsv ? '\t' : ',';
-  auto generator =
-      constructQueryResultToTriples(qet, constructTriples, limitAndOffset,
-                                    result, std::move(cancellationHandle));
+  [[maybe_unused]] uint64_t resultSize = 0;
+  auto generator = constructQueryResultToTriples(
+      qet, constructTriples, limitAndOffset, result, resultSize,
+      std::move(cancellationHandle));
   for (auto& triple : generator) {
     co_yield escapeFunction(std::move(triple.subject_));
     co_yield sep;
@@ -857,28 +928,34 @@ ExportQueryExecutionTrees::computeResultAsQLeverJSON(
   co_yield absl::StrCat(prefixStr.substr(0, prefixStr.size() - 1),
                         R"(,"res":[)");
 
+  // Yield the bindings and compute the result size.
+  uint64_t resultSize = 0;
   auto bindings = [&]() {
     if (query.hasSelectClause()) {
       return selectQueryResultBindingsToQLeverJSON(
           qet, query.selectClause(), query._limitOffset, std::move(result),
-          std::move(cancellationHandle));
+          resultSize, std::move(cancellationHandle));
     } else if (query.hasConstructClause()) {
       return constructQueryResultBindingsToQLeverJSON(
           qet, query.constructClause().triples_, query._limitOffset,
-          std::move(result), std::move(cancellationHandle));
+          std::move(result), resultSize, std::move(cancellationHandle));
     } else {
       // TODO<joka921>: Refactor this to use std::visit.
       return askQueryResultToQLeverJSON(std::move(result));
     }
   }();
 
-  size_t resultSize = 0;
+  size_t numBindingsExported = 0;
   for (const std::string& b : bindings) {
-    if (resultSize > 0) [[likely]] {
+    if (numBindingsExported > 0) [[likely]] {
       co_yield ",";
     }
     co_yield b;
-    ++resultSize;
+    ++numBindingsExported;
+  }
+  if (numBindingsExported < resultSize) {
+    LOG(INFO) << "Number of bindings exported: " << numBindingsExported
+              << " of " << resultSize << std::endl;
   }
 
   RuntimeInformation runtimeInformation = qet.getRootOperation()->runtimeInfo();
@@ -888,11 +965,29 @@ ExportQueryExecutionTrees::computeResultAsQLeverJSON(
       std::chrono::duration_cast<std::chrono::milliseconds>(
           timeUntilFunctionCall + runtimeInformation.totalTime_);
 
+  // NOTE: We report three "results sizes" in the QLever JSON output, for the
+  // following reasons:
+  //
+  // The `resultSizeExported` is the number of bindings exported. This is
+  // redundant information (we could simply count the number of entries in the
+  // `res` array), but it is useful for testing and emphasizes the conceptual
+  // difference to `resultSizeTotal`.
+  //
+  // The `resultSizeTotal` is the number of results of the WHOLE query. For
+  // CONSTRUCT queries, it can be an overestimate because it also includes
+  // triples, where one of the components is UNDEF, which are not included
+  // in the final result of a CONSTRUCT query.
+  //
+  // The `resultsize` is equal to `resultSizeTotal`. It is included for
+  // backwards compatibility, in particular, because the QLever UI uses it
+  // at many places.
   nlohmann::json jsonSuffix;
   jsonSuffix["runtimeInformation"]["meta"] = nlohmann::ordered_json(
       qet.getRootOperation()->getRuntimeInfoWholeQuery());
   jsonSuffix["runtimeInformation"]["query_execution_tree"] =
       nlohmann::ordered_json(runtimeInformation);
+  jsonSuffix["resultSizeExported"] = numBindingsExported;
+  jsonSuffix["resultSizeTotal"] = resultSize;
   jsonSuffix["resultsize"] = resultSize;
   jsonSuffix["time"]["total"] =
       absl::StrCat(requestTimer.msecs().count(), "ms");
diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h
index 91a37b6d40..a1443e802d 100644
--- a/src/engine/ExportQueryExecutionTrees.h
+++ b/src/engine/ExportQueryExecutionTrees.h
@@ -1,6 +1,8 @@
-//  Copyright 2022, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Copyright 2022 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+//          Robin Textor-Falconi <textorr@cs.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
 
 #pragma once
 
@@ -10,14 +12,9 @@
 #include "util/http/MediaTypes.h"
 #include "util/json.h"
 
-// This class contains all the functionality to convert a query that has already
-// been parsed (by the SPARQL parser) and planned (by the query planner) into
-// a serialized result. In particular, it creates TSV, CSV, Turtle, JSON (SPARQL
-// conforming and QLever's flavor) and binary results).
-// This class has only two static public functions, one for the JSON
-// output format (which returns a `nlohmann::json` object) and one for the other
-// result formats (tsv, csv, turtle, binary) which returns a
-// `streamable_generator`.
+// Class for computing the result of an already parsed and planned query and
+// exporting it in different formats (TSV, CSV, Turtle, JSON, Binary).
+//
 // TODO<joka921> Also implement a streaming JSON serializer to reduce the RAM
 // consumption of large JSON exports and to make this interface even simpler.
 class ExportQueryExecutionTrees {
@@ -41,18 +38,6 @@ class ExportQueryExecutionTrees {
       MediaType mediaType, const ad_utility::Timer& requestTimer,
       CancellationHandle cancellationHandle);
 
-  // Compute the result of the given `parsedQuery` (created by the
-  // `SparqlParser`) for which the `QueryExecutionTree` has been previously
-  // created by the `QueryPlanner`. The result is converted to the format
-  // specified by the `mediaType`. Supported formats for this function are
-  // `SparqlJSON` and `QLeverJSON`. Note that the SparqlJSON format can only be
-  // used with SELECT queries. Invalid `mediaType`s and invalid combinations of
-  // `mediaType` and the query type will throw. The result is returned as a
-  // single JSON object that is fully materialized before the function returns.
-  // The `requestTimer` is used to report timing statistics on the query. It
-  // must have already run during the query planning to produce the expected
-  // results.
-
   // Convert the `id` to a human-readable string. The `index` is used to resolve
   // `Id`s with datatype `VocabIndex` or `TextRecordIndex`. The `localVocab` is
   // used to resolve `Id`s with datatype `LocalVocabIndex`. The `escapeFunction`
@@ -105,66 +90,64 @@ class ExportQueryExecutionTrees {
       ad_utility::streams::stream_generator streamGenerator);
 
  private:
-  // Similar to `computeResult` but returns a stream in
-  // QLeverJSON-format.
+  // Generate the bindings of the result of a SELECT or CONSTRUCT query in the
+  // `application/qlever-results+json` format.
+  //
+  // NOTE: This calls `selectQueryResultBindingsToQLeverJSON` or
+  // `constructQueryResultBindingsToQLeverJSON` for the bindings and adds the
+  // remaining (meta) fields needed for the `application/qlever-results+json`
+  // format.
   static ad_utility::streams::stream_generator computeResultAsQLeverJSON(
       const ParsedQuery& query, const QueryExecutionTree& qet,
       const ad_utility::Timer& requestTimer,
       CancellationHandle cancellationHandle);
 
-  // ___________________________________________________________________________
+  // Generate the bindings of the result of a SELECT query in the
+  // `application/ qlever+json` format.
   static cppcoro::generator<std::string> selectQueryResultBindingsToQLeverJSON(
       const QueryExecutionTree& qet,
       const parsedQuery::SelectClause& selectClause,
       const LimitOffsetClause& limitAndOffset,
-      std::shared_ptr<const Result> result,
-      CancellationHandle cancellationHandle);
-  /**
-   * @brief Convert an `IdTable` (typically from a query result) to a JSON
-   * array In the `QLeverJSON` format. This function is called by
-   *  `computeQueryResultAsQLeverJSON` to obtain the "actual" query results
-   * (without the meta data)
-   * @param qet The `QueryExecutionTree` of the query.
-   * @param limitAndOffset at most <limit> entries are written, starting at
-   * <from>
-   * @param columns each pair of <columnInIdTable, correspondingType> tells
-   * us which columns are to be serialized in which order
-   * @param resultTable The query result in the ID space. If it is `nullptr`,
-   *        then the query result will be obtained via `qet->getResult()`.
-   * @return a 2D-Json array corresponding to the IdTable given the arguments
-   */
-  static cppcoro::generator<std::string> idTableToQLeverJSONBindings(
-      const QueryExecutionTree& qet, const LimitOffsetClause& limitAndOffset,
-      const QueryExecutionTree::ColumnIndicesAndTypes columns,
-      std::shared_ptr<const Result> result,
+      std::shared_ptr<const Result> result, uint64_t& resultSize,
       CancellationHandle cancellationHandle);
 
-  // ___________________________________________________________________________
+  // Generate the bindings of the result of a CONSTRUCT query in the
+  // `application/ qlever+json` format.
   static cppcoro::generator<std::string>
   constructQueryResultBindingsToQLeverJSON(
       const QueryExecutionTree& qet,
       const ad_utility::sparql_types::Triples& constructTriples,
       const LimitOffsetClause& limitAndOffset,
-      std::shared_ptr<const Result> res, CancellationHandle cancellationHandle);
+      std::shared_ptr<const Result> result, uint64_t& resultSize,
+      CancellationHandle cancellationHandle);
 
-  // Generate an RDF graph for a CONSTRUCT query.
+  // Helper function that generates the individual bindings for the
+  // `application/ qlever+json` format.
+  static cppcoro::generator<std::string> idTableToQLeverJSONBindings(
+      const QueryExecutionTree& qet, LimitOffsetClause limitAndOffset,
+      const QueryExecutionTree::ColumnIndicesAndTypes columns,
+      std::shared_ptr<const Result> result, uint64_t& resultSize,
+      CancellationHandle cancellationHandle);
+
+  // Helper function that generates the result of a CONSTRUCT query as
+  // `StringTriple`s.
   static cppcoro::generator<QueryExecutionTree::StringTriple>
   constructQueryResultToTriples(
       const QueryExecutionTree& qet,
       const ad_utility::sparql_types::Triples& constructTriples,
-      LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> res,
-      CancellationHandle cancellationHandle);
+      LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> result,
+      uint64_t& resultSize, CancellationHandle cancellationHandle);
 
-  // ___________________________________________________________________________
+  // Helper function that generates the result of a CONSTRUCT query as a
+  // CSV or TSV stream.
   template <MediaType format>
   static ad_utility::streams::stream_generator constructQueryResultToStream(
       const QueryExecutionTree& qet,
       const ad_utility::sparql_types::Triples& constructTriples,
-      LimitOffsetClause limitAndOffset,
-      std::shared_ptr<const Result> resultTable,
+      LimitOffsetClause limitAndOffset, std::shared_ptr<const Result> result,
       CancellationHandle cancellationHandle);
 
-  // _____________________________________________________________________________
+  // Generate the result of a SELECT query as a CSV or TSV or binary stream.
   template <MediaType format>
   static ad_utility::streams::stream_generator selectQueryResultToStream(
       const QueryExecutionTree& qet,
@@ -189,12 +172,26 @@ class ExportQueryExecutionTrees {
   static cppcoro::generator<ExportQueryExecutionTrees::TableConstRefWithVocab>
   getIdTables(const Result& result);
 
-  // Return a range that contains the indices of the rows that have to be
-  // exported from the `idTable` given the `LimitOffsetClause`. It takes into
-  // account the LIMIT, the OFFSET, and the actual size of the `idTable`
+  // Generate the result in "blocks" and, when iterating over the generator
+  // from beginning to end, return the total number of rows in the result
+  // in `totalResultSize`.
+  //
+  // Blocks, where all rows are before OFFSET, are requested (and hence
+  // computed), but skipped.
+  //
+  // Blocks, where at least one row is after OFFSET but before the effective
+  // export limit (minimum of the LIMIT and the value of the `send` parameter),
+  // are requested and yielded (together with the corresponding `LocalVocab`
+  // and the range from that `IdTable` that belongs to the result).
+  //
+  // Blocks after the effective export limit until the LIMIT are requested, and
+  // counted towards the `totalResultSize`, but not yielded.
+  //
+  // Blocks after the LIMIT are not even requested.
  public:
   static cppcoro::generator<TableWithRange> getRowIndices(
-      LimitOffsetClause limitOffset, const Result& result);
+      LimitOffsetClause limitOffset, const Result& result,
+      uint64_t& resutSizeTotal);
 
  private:
   FRIEND_TEST(ExportQueryExecutionTrees, getIdTablesReturnsSingletonIterator);
diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp
index bb7d8fb0af..9c6ae814aa 100644
--- a/src/engine/Operation.cpp
+++ b/src/engine/Operation.cpp
@@ -433,7 +433,8 @@ void Operation::createRuntimeInfoFromEstimates(
 
   _runtimeInfo->costEstimate_ = getCostEstimate();
   _runtimeInfo->sizeEstimate_ = getSizeEstimateBeforeLimit();
-  const auto& [limit, offset, _] = getLimit();
+  // We are interested only in the first two elements of the limit tuple.
+  const auto& [limit, offset, _1, _2] = getLimit();
   if (limit.has_value()) {
     _runtimeInfo->addDetail("limit", limit.value());
   }
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index 5a5085a23c..87ca3897fc 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -1,4 +1,4 @@
-// Copyright 2011 - 2022, University of Freiburg
+// Copyright 2011 - 2024, University of Freiburg
 // Chair of Algorithms and Data Structures
 // Authors: Björn Buchhold <b.buchhold@gmail.com>
 //          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
@@ -761,19 +761,6 @@ Awaitable<void> Server::processQuery(
   LOG(INFO) << "Requested media type of result is \""
             << ad_utility::toString(mediaType) << "\"" << std::endl;
 
-  // TODO<c++23> use std::optional::transform
-  std::optional<uint64_t> maxSend = std::nullopt;
-  auto parameterValue =
-      ad_utility::url_parser::getParameterCheckAtMostOnce(params, "send");
-  if (parameterValue.has_value()) {
-    maxSend = std::stoul(parameterValue.value());
-  }
-  // Limit JSON requests by default
-  if (!maxSend.has_value() && (mediaType == MediaType::sparqlJson ||
-                               mediaType == MediaType::qleverJson)) {
-    maxSend = MAX_NOF_ROWS_IN_RESULT;
-  }
-
   auto queryHub = queryHub_.lock();
   AD_CORRECTNESS_CHECK(queryHub);
   ad_utility::websocket::MessageSender messageSender{getQueryId(request, query),
@@ -800,22 +787,27 @@ Awaitable<void> Server::processQuery(
     // This may be caused by a bug (the code is not yet tested well) or by an
     // attack which tries to circumvent (not yet existing) access controls for
     // Update.
-    throw std::runtime_error("Expected Query but received Update.");
+    throw std::runtime_error("Expected normal query but received update query");
   }
 
-  // Apply stricter limit for export if present
-  if (maxSend.has_value()) {
-    auto& pq = plannedQuery.parsedQuery_;
-    pq._limitOffset._limit =
-        std::min(maxSend.value(), pq._limitOffset.limitOrDefault());
+  // Read the export limit from the send` parameter (historical name). This
+  // limits the number of bindings exported in `ExportQueryExecutionTrees`.
+  // It should only have an effect for the QLever JSON export.
+  auto& limitOffset = plannedQuery.parsedQuery_._limitOffset;
+  auto& exportLimit = limitOffset.exportLimit_;
+  auto sendParameter =
+      ad_utility::url_parser::getParameterCheckAtMostOnce(params, "send");
+  if (sendParameter.has_value() && mediaType == MediaType::qleverJson) {
+    exportLimit = std::stoul(sendParameter.value());
   }
-  // Make sure we don't underflow here
-  AD_CORRECTNESS_CHECK(plannedQuery.parsedQuery_._limitOffset._offset >=
+
+  // Make sure that the offset is not applied again when exporting the result
+  // (it is already applied by the root operation in the query execution
+  // tree). Note that we don't need this for the limit because applying a
+  // fixed limit is idempotent.
+  AD_CORRECTNESS_CHECK(limitOffset._offset >=
                        qet.getRootOperation()->getLimit()._offset);
-  // Don't apply offset twice, if the offset was not applied to the operation
-  // then the exporter can safely apply it during export.
-  plannedQuery.parsedQuery_._limitOffset._offset -=
-      qet.getRootOperation()->getLimit()._offset;
+  limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;
 
   // This actually processes the query and sends the result in the requested
   // format.
@@ -824,6 +816,9 @@ Awaitable<void> Server::processQuery(
 
   // Print the runtime info. This needs to be done after the query
   // was computed.
+  LOG(INFO) << "Done processing query and sending result"
+            << ", total time was " << requestTimer.msecs().count() << " ms"
+            << std::endl;
 
   // Log that we are done with the query and how long it took.
   //
@@ -833,9 +828,6 @@ Awaitable<void> Server::processQuery(
   // contain timing information).
   //
   // TODO<joka921> Also log an identifier of the query.
-  LOG(INFO) << "Done processing query and sending result"
-            << ", total time was " << requestTimer.msecs().count() << " ms"
-            << std::endl;
   LOG(DEBUG) << "Runtime Info:\n"
              << qet.getRootOperation()->runtimeInfo().toString() << std::endl;
   co_return;
diff --git a/src/engine/Server.h b/src/engine/Server.h
index 7abaf3eaf5..e7e514adf1 100644
--- a/src/engine/Server.h
+++ b/src/engine/Server.h
@@ -1,4 +1,4 @@
-// Copyright 2021 - 2022, University of Freiburg
+// Copyright 2021 - 2024, University of Freiburg
 // Chair of Algorithms and Data Structures
 // Authors: Johannes Kalmbach<kalmbach@cs.uni-freiburg.de>
 //          Hannah Bast <bast@cs.uni-freiburg.de>
diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h
index afe13c7c07..329bd1333b 100644
--- a/src/index/DeltaTriples.h
+++ b/src/index/DeltaTriples.h
@@ -105,6 +105,7 @@ class DeltaTriples {
   explicit DeltaTriples(const Index& index);
   explicit DeltaTriples(const IndexImpl& index) : index_{index} {};
 
+  // Disable accidental copying.
   DeltaTriples(const DeltaTriples&) = delete;
   DeltaTriples& operator=(const DeltaTriples&) = delete;
 
diff --git a/src/parser/data/LimitOffsetClause.h b/src/parser/data/LimitOffsetClause.h
index 36442d9c5b..63b51932e6 100644
--- a/src/parser/data/LimitOffsetClause.h
+++ b/src/parser/data/LimitOffsetClause.h
@@ -1,6 +1,8 @@
-//  Copyright 2022, University of Freiburg,
-//  Chair of Algorithms and Data Structures.
-//  Author: Julian Mundhahs (mundhahj@informatik.uni-freiburg.de)
+// Copyright 2022 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Julian Mundhahs <mundhahj@cs.uni-freiburg.de>
+//          Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
 
 #pragma once
 
@@ -12,15 +14,19 @@
 
 // Represents the data returned by a limitOffsetClause.
 struct LimitOffsetClause {
-  std::optional<uint64_t> _limit;
+  std::optional<uint64_t> _limit = std::nullopt;
   uint64_t _offset = 0;
   std::optional<uint64_t> textLimit_ = std::nullopt;
+  std::optional<uint64_t> exportLimit_ = std::nullopt;
 
   // If a limit is specified, return the limit, else return the maximal
   // representable limit.
   uint64_t limitOrDefault() const {
     return _limit.value_or(std::numeric_limits<uint64_t>::max());
   }
+  uint64_t exportLimitOrDefault() const {
+    return exportLimit_.value_or(std::numeric_limits<uint64_t>::max());
+  }
 
   // Return the minimum of the offset and the `actualSize` of a query result.
   // That way, if the offset is too large, the result after applying it will be
@@ -30,10 +36,10 @@ struct LimitOffsetClause {
     return std::min(actualSize, _offset);
   }
 
-  // When applying the limit and offset to a table of `actualSize`, what is the
-  // actual upper bound (as an index into the table) for the resulting elements.
-  // In the most simple case this is `limit + offset`, but this function handles
-  // all possible overflows. The result will always be `<= actualSize`.
+  // Return the largest index into a table of size `actualSize` when applying
+  // the limit and offset. When a limit and offset are specified and the table
+  // is large enough, this is simply `limit + offset`. Otherwise, it is
+  // appropriately clamped.
   uint64_t upperBound(uint64_t actualSize) const {
     auto val = limitOrDefault() + _offset;
     val = val >= std::max(limitOrDefault(), _offset)
diff --git a/test/ExecuteUpdateTest.cpp b/test/ExecuteUpdateTest.cpp
index 5367d8ec7d..83015cac30 100644
--- a/test/ExecuteUpdateTest.cpp
+++ b/test/ExecuteUpdateTest.cpp
@@ -32,14 +32,14 @@ MATCHER_P(AlwaysFalse, msg, "") {
   return false;
 }
 
-// _____________________________________________________________________________
+// Test the `ExecuteUpdate::executeUpdate` method. These tests run on the
+// default dataset defined in `IndexTestHelpers::makeTestIndex`.
 TEST(ExecuteUpdate, executeUpdate) {
-  auto executeUpdate = [](const std::string& update) {
-    // These tests run on the default dataset defined in
-    // `IndexTestHelpers::makeTestIndex`.
-    QueryExecutionContext* qec = ad_utility::testing::getQec(std::nullopt);
-    const Index& index = qec->getIndex();
-    DeltaTriples deltaTriples{index};
+  QueryExecutionContext* qec = ad_utility::testing::getQec(std::nullopt);
+  const Index& index = qec->getIndex();
+  // Perform the given `update` and store result in given `deltaTriples`.
+  auto expectExecuteUpdateHelper = [&qec, &index](const std::string& update,
+                                                  DeltaTriples& deltaTriples) {
     const auto sharedHandle =
         std::make_shared<ad_utility::CancellationHandle<>>();
     const std::vector<DatasetClause> datasets = {};
@@ -47,20 +47,26 @@ TEST(ExecuteUpdate, executeUpdate) {
     QueryPlanner qp{qec, sharedHandle};
     const auto qet = qp.createExecutionTree(pq);
     ExecuteUpdate::executeUpdate(index, pq, qet, deltaTriples, sharedHandle);
-    return deltaTriples;
   };
+  // Execute the given `update` and check that the delta triples are correct.
   auto expectExecuteUpdate =
-      [&executeUpdate](
+      [&index, &expectExecuteUpdateHelper](
           const std::string& update,
           const testing::Matcher<const DeltaTriples&>& deltaTriplesMatcher) {
-        EXPECT_THAT(executeUpdate(update), deltaTriplesMatcher);
+        DeltaTriples deltaTriples{index};
+        expectExecuteUpdateHelper(update, deltaTriples);
+        EXPECT_THAT(deltaTriples, deltaTriplesMatcher);
       };
+  // Execute the given `update` and check that it fails with the given message.
   auto expectExecuteUpdateFails =
-      [&executeUpdate](
+      [&index, &expectExecuteUpdateHelper](
           const std::string& update,
           const testing::Matcher<const std::string&>& messageMatcher) {
-        AD_EXPECT_THROW_WITH_MESSAGE(executeUpdate(update), messageMatcher);
+        DeltaTriples deltaTriples{index};
+        AD_EXPECT_THROW_WITH_MESSAGE(
+            expectExecuteUpdateHelper(update, deltaTriples), messageMatcher);
       };
+  // Now the actual tests.
   expectExecuteUpdate("INSERT DATA { <s> <p> <o> . }", NumTriples(1, 0, 1));
   expectExecuteUpdate("DELETE DATA { <z> <label> \"zz\"@en }",
                       NumTriples(0, 1, 1));
diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp
index 081bef958d..663bc46fd0 100644
--- a/test/ExportQueryExecutionTreesTest.cpp
+++ b/test/ExportQueryExecutionTreesTest.cpp
@@ -1,6 +1,8 @@
-//  Copyright 2023, University of Freiburg,
-//                  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+// Copyright 2023 - 2024, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+//          Robin Textor-Falconi <robintf@cs.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
 
 #include <gmock/gmock.h>
 
@@ -24,10 +26,10 @@ using ::testing::HasSubstr;
 namespace {
 // Run the given SPARQL `query` on the given Turtle `kg` and export the result
 // as the `mediaType`. `mediaType` must be TSV or CSV.
-std::string runQueryStreamableResult(const std::string& kg,
-                                     const std::string& query,
-                                     ad_utility::MediaType mediaType,
-                                     bool useTextIndex = false) {
+std::string runQueryStreamableResult(
+    const std::string& kg, const std::string& query,
+    ad_utility::MediaType mediaType, bool useTextIndex = false,
+    std::optional<size_t> exportLimit = std::nullopt) {
   auto qec =
       ad_utility::testing::getQec(kg, true, true, true, 16_B, useTextIndex);
   // TODO<joka921> There is a bug in the caching that we have yet to trace.
@@ -37,6 +39,7 @@ std::string runQueryStreamableResult(const std::string& kg,
       std::make_shared<ad_utility::CancellationHandle<>>();
   QueryPlanner qp{qec, cancellationHandle};
   auto pq = SparqlParser::parseQuery(query);
+  pq._limitOffset.exportLimit_ = exportLimit;
   auto qet = qp.createExecutionTree(pq);
   ad_utility::Timer timer(ad_utility::Timer::Started);
   auto strGenerator = ExportQueryExecutionTrees::computeResult(
@@ -78,7 +81,7 @@ nlohmann::json runJSONQuery(const std::string& kg, const std::string& query,
 struct TestCaseSelectQuery {
   std::string kg;                   // The knowledge graph (TURTLE)
   std::string query;                // The query (SPARQL)
-  size_t resultSize;                // The expected number of results.
+  uint64_t resultSize;              // The expected number of results.
   std::string resultTsv;            // The expected result in TSV format.
   std::string resultCsv;            // The expected result in CSV format
   nlohmann::json resultQLeverJSON;  // The expected result in QLeverJSOn format.
@@ -102,10 +105,14 @@ struct TestCaseAskQuery {
   std::string resultXml;
 };
 
+// For a CONSTRUCT query, the `resultSize` of the QLever JSON is the number of
+// results of the WHERE clause.
 struct TestCaseConstructQuery {
   std::string kg;                   // The knowledge graph (TURTLE)
   std::string query;                // The query (SPARQL)
-  size_t resultSize;                // The expected number of results.
+  uint64_t resultSizeTotal;         // The expected number of results,
+                                    // including triples with UNDEF values.
+  uint64_t resultSizeExported;      // The expected number of results exported.
   std::string resultTsv;            // The expected result in TSV format.
   std::string resultCsv;            // The expected result in CSV format
   std::string resultTurtle;         // The expected result in Turtle format
@@ -128,13 +135,14 @@ void runSelectQueryTestCase(
       runQueryStreamableResult(testCase.kg, testCase.query, csv, useTextIndex),
       testCase.resultCsv);
 
-  auto qleverJSONResult = nlohmann::json::parse(runQueryStreamableResult(
+  auto resultJSON = nlohmann::json::parse(runQueryStreamableResult(
       testCase.kg, testCase.query, qleverJson, useTextIndex));
   // TODO<joka921> Test other members of the JSON result (e.g. the selected
   // variables).
-  ASSERT_EQ(qleverJSONResult["query"], testCase.query);
-  ASSERT_EQ(qleverJSONResult["resultsize"], testCase.resultSize);
-  EXPECT_EQ(qleverJSONResult["res"], testCase.resultQLeverJSON);
+  ASSERT_EQ(resultJSON["query"], testCase.query);
+  ASSERT_EQ(resultJSON["resultSizeTotal"], testCase.resultSize);
+  ASSERT_EQ(resultJSON["resultSizeExported"], testCase.resultSize);
+  EXPECT_EQ(resultJSON["res"], testCase.resultQLeverJSON);
 
   EXPECT_EQ(nlohmann::json::parse(runQueryStreamableResult(
                 testCase.kg, testCase.query, sparqlJson, useTextIndex)),
@@ -144,6 +152,16 @@ void runSelectQueryTestCase(
   auto xmlAsString = runQueryStreamableResult(testCase.kg, testCase.query,
                                               sparqlXml, useTextIndex);
   EXPECT_EQ(testCase.resultXml, xmlAsString);
+
+  // Test the interaction of normal limit (the LIMIT of the query) and export
+  // limit (the value of the `send` parameter).
+  for (uint64_t exportLimit = 0ul; exportLimit < 4ul; ++exportLimit) {
+    auto resultJson = nlohmann::json::parse(runQueryStreamableResult(
+        testCase.kg, testCase.query, qleverJson, false, exportLimit));
+    ASSERT_EQ(resultJson["resultSizeTotal"], testCase.resultSize);
+    ASSERT_EQ(resultJson["resultSizeExported"],
+              std::min(exportLimit, testCase.resultSize));
+  }
 }
 
 // Run a single test case for a CONSTRUCT query.
@@ -156,13 +174,24 @@ void runConstructQueryTestCase(
             testCase.resultTsv);
   EXPECT_EQ(runQueryStreamableResult(testCase.kg, testCase.query, csv),
             testCase.resultCsv);
-  auto qleverJSONStreamResult = nlohmann::json::parse(
+  auto resultJson = nlohmann::json::parse(
       runQueryStreamableResult(testCase.kg, testCase.query, qleverJson));
-  ASSERT_EQ(qleverJSONStreamResult["query"], testCase.query);
-  ASSERT_EQ(qleverJSONStreamResult["resultsize"], testCase.resultSize);
-  EXPECT_EQ(qleverJSONStreamResult["res"], testCase.resultQLeverJSON);
+  ASSERT_EQ(resultJson["query"], testCase.query);
+  ASSERT_EQ(resultJson["resultSizeTotal"], testCase.resultSizeTotal);
+  ASSERT_EQ(resultJson["resultSizeExported"], testCase.resultSizeExported);
+  EXPECT_EQ(resultJson["res"], testCase.resultQLeverJSON);
   EXPECT_EQ(runQueryStreamableResult(testCase.kg, testCase.query, turtle),
             testCase.resultTurtle);
+
+  // Test the interaction of normal limit (the LIMIT of the query) and export
+  // limit (the value of the `send` parameter).
+  for (uint64_t exportLimit = 0ul; exportLimit < 4ul; ++exportLimit) {
+    auto resultJson = nlohmann::json::parse(runQueryStreamableResult(
+        testCase.kg, testCase.query, qleverJson, false, exportLimit));
+    ASSERT_EQ(resultJson["resultSizeTotal"], testCase.resultSizeTotal);
+    ASSERT_EQ(resultJson["resultSizeExported"],
+              std::min(exportLimit, testCase.resultSizeExported));
+  }
 }
 
 // Run a single test case for an ASK query.
@@ -178,11 +207,11 @@ void runAskQueryTestCase(
       runQueryStreamableResult(testCase.kg, testCase.query, octetStream));
   EXPECT_ANY_THROW(
       runQueryStreamableResult(testCase.kg, testCase.query, turtle));
-  auto qleverJSONStreamResult = nlohmann::json::parse(
+  auto resultJson = nlohmann::json::parse(
       runQueryStreamableResult(testCase.kg, testCase.query, qleverJson));
-  ASSERT_EQ(qleverJSONStreamResult["query"], testCase.query);
-  ASSERT_EQ(qleverJSONStreamResult["resultsize"], 1u);
-  EXPECT_EQ(qleverJSONStreamResult["res"], testCase.resultQLeverJSON);
+  ASSERT_EQ(resultJson["query"], testCase.query);
+  ASSERT_EQ(resultJson["resultSizeExported"], 1u);
+  EXPECT_EQ(resultJson["res"], testCase.resultQLeverJSON);
 
   EXPECT_EQ(nlohmann::json::parse(runQueryStreamableResult(
                 testCase.kg, testCase.query, sparqlJson)),
@@ -344,7 +373,7 @@ TEST(ExportQueryExecutionTrees, Integers) {
   runSelectQueryTestCase(testCase);
 
   TestCaseConstructQuery testCaseConstruct{
-      kg, "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 3,
+      kg, "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 3, 3,
       // TSV
       "<s>\t<p>\t-42019234865781\n"
       "<s>\t<p>\t42\n"
@@ -402,7 +431,7 @@ TEST(ExportQueryExecutionTrees, Bool) {
   runSelectQueryTestCase(testCase);
 
   TestCaseConstructQuery testCaseConstruct{
-      kg, "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 2,
+      kg, "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 2, 2,
       // TSV
       "<s>\t<p>\tfalse\n"
       "<s>\t<p>\ttrue\n",
@@ -444,10 +473,10 @@ TEST(ExportQueryExecutionTrees, UnusedVariable) {
       makeExpectedSparqlJSON({}), expectedXml};
   runSelectQueryTestCase(testCase);
 
-  // If we use a variable that is always unbound in a CONSTRUCT triple, then
-  // the result for this triple will be empty.
+  // The `2` is the number of results including triples with UNDEF values. The
+  // `0` is the number of results excluding such triples.
   TestCaseConstructQuery testCaseConstruct{
-      kg, "CONSTRUCT {?x ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 0,
+      kg, "CONSTRUCT {?x ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 2, 0,
       // TSV
       "",
       // CSV
@@ -502,7 +531,7 @@ TEST(ExportQueryExecutionTrees, Floats) {
   runSelectQueryTestCase(testCaseFloat);
 
   TestCaseConstructQuery testCaseConstruct{
-      kg, "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 3,
+      kg, "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o", 3, 3,
       // TSV
       "<s>\t<p>\t-42019234865780982022144\n"
       "<s>\t<p>\t4.01293e-12\n"
@@ -559,6 +588,7 @@ TEST(ExportQueryExecutionTrees, Dates) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t\"1950-01-01T00:00:00\"^^<http://www.w3.org/2001/"
       "XMLSchema#dateTime>\n",  // missing
@@ -648,6 +678,7 @@ TEST(ExportQueryExecutionTrees, Entities) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t<http://qlever.com/o>\n",
       // CSV
@@ -696,6 +727,7 @@ TEST(ExportQueryExecutionTrees, LiteralWithLanguageTag) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t\"Some\"Where Over,\"@en-ca\n",
       // CSV
@@ -744,6 +776,7 @@ TEST(ExportQueryExecutionTrees, LiteralWithDatatype) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t\"something\"^^<www.example.org/bim>\n",
       // CSV
@@ -791,6 +824,7 @@ TEST(ExportQueryExecutionTrees, LiteralPlain) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t\"something\"\n",
       // CSV
@@ -836,6 +870,7 @@ testIriKg</uri></binding>
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t<https:// : )\\ntestIriKg>\n",
       // CSV
@@ -899,6 +934,7 @@ TEST(ExportQueryExecutionTrees, TestWithIriExtendedEscaped) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t<iriescaped\x01o\x02"
       "e\x03i\x04o\x05u\x06"
@@ -953,6 +989,7 @@ TEST(ExportQueryExecutionTrees, TestIriWithEscapedIriString) {
       kg,
       "CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o} ORDER BY ?o",
       1,
+      1,
       // TSV
       "<s>\t<p>\t\" hallo\\n  welt\"\n",
       // CSV
@@ -993,12 +1030,13 @@ TEST(ExportQueryExecutionTrees, UndefinedValues) {
       expectedXml};
   runSelectQueryTestCase(testCase);
 
-  // In CONSTRUCT queries, results with undefined values in the exported
-  // variables are filtered out, so the result is empty.
+  // The `1` is the number of results including triples with UNDEF values. The
+  // `0` is the number of results excluding such triples.
   TestCaseConstructQuery testCaseConstruct{
       kg,
       "CONSTRUCT {?s <pred> ?o} WHERE {?s <p> <o> OPTIONAL {?s <p2> ?o}} ORDER "
       "BY ?o",
+      1,
       0,
       "",
       "",
@@ -1338,12 +1376,14 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) {
   }();
 
   Result result{std::move(tableGenerator), {}};
+  uint64_t resultSizeTotal = 0;
   auto generator = ExportQueryExecutionTrees::getRowIndices(
-      LimitOffsetClause{._limit = 1, ._offset = 1}, result);
+      LimitOffsetClause{._limit = 1, ._offset = 1}, result, resultSizeTotal);
 
-  auto referenceTable = makeIdTableFromVector({{2}});
+  auto expectedResult = makeIdTableFromVector({{2}});
   EXPECT_THAT(convertToVector(std::move(generator)),
-              matchesIdTables(referenceTable));
+              matchesIdTables(expectedResult));
+  EXPECT_EQ(resultSizeTotal, 1);
 }
 
 // _____________________________________________________________________________
@@ -1360,13 +1400,16 @@ TEST(ExportQueryExecutionTrees,
   }();
 
   Result result{std::move(tableGenerator), {}};
+  uint64_t resultSizeTotal = 0;
   auto generator = ExportQueryExecutionTrees::getRowIndices(
-      LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result);
+      LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result,
+      resultSizeTotal);
 
-  auto referenceTable1 = makeIdTableFromVector({{4}, {5}});
+  auto expectedResult = makeIdTableFromVector({{4}, {5}});
 
   EXPECT_THAT(convertToVector(std::move(generator)),
-              matchesIdTables(referenceTable1));
+              matchesIdTables(expectedResult));
+  EXPECT_EQ(resultSizeTotal, 2);
 }
 
 // _____________________________________________________________________________
@@ -1383,13 +1426,15 @@ TEST(ExportQueryExecutionTrees,
   }();
 
   Result result{std::move(tableGenerator), {}};
+  uint64_t resultSizeTotal = 0;
   auto generator = ExportQueryExecutionTrees::getRowIndices(
-      LimitOffsetClause{._limit = 3}, result);
+      LimitOffsetClause{._limit = 3}, result, resultSizeTotal);
 
-  auto referenceTable1 = makeIdTableFromVector({{1}, {2}, {3}});
+  auto expectedResult = makeIdTableFromVector({{1}, {2}, {3}});
 
   EXPECT_THAT(convertToVector(std::move(generator)),
-              matchesIdTables(referenceTable1));
+              matchesIdTables(expectedResult));
+  EXPECT_EQ(resultSizeTotal, 3);
 }
 
 // _____________________________________________________________________________
@@ -1406,14 +1451,16 @@ TEST(ExportQueryExecutionTrees,
   }();
 
   Result result{std::move(tableGenerator), {}};
+  uint64_t resultSizeTotal = 0;
   auto generator = ExportQueryExecutionTrees::getRowIndices(
-      LimitOffsetClause{._limit = 3, ._offset = 1}, result);
+      LimitOffsetClause{._limit = 3, ._offset = 1}, result, resultSizeTotal);
 
-  auto referenceTable1 = makeIdTableFromVector({{2}, {3}});
-  auto referenceTable2 = makeIdTableFromVector({{4}});
+  auto expectedResult1 = makeIdTableFromVector({{2}, {3}});
+  auto expectedResult2 = makeIdTableFromVector({{4}});
 
   EXPECT_THAT(convertToVector(std::move(generator)),
-              matchesIdTables(referenceTable1, referenceTable2));
+              matchesIdTables(expectedResult1, expectedResult2));
+  EXPECT_EQ(resultSizeTotal, 3);
 }
 
 // _____________________________________________________________________________
@@ -1434,30 +1481,33 @@ TEST(ExportQueryExecutionTrees,
   }();
 
   Result result{std::move(tableGenerator), {}};
+  uint64_t resultSizeTotal = 0;
   auto generator = ExportQueryExecutionTrees::getRowIndices(
-      LimitOffsetClause{._limit = 5, ._offset = 2}, result);
+      LimitOffsetClause{._limit = 5, ._offset = 2}, result, resultSizeTotal);
 
-  auto referenceTable1 = makeIdTableFromVector({{3}});
-  auto referenceTable2 = makeIdTableFromVector({{4}, {5}});
-  auto referenceTable3 = makeIdTableFromVector({{6}, {7}});
+  auto expectedTable1 = makeIdTableFromVector({{3}});
+  auto expectedTable2 = makeIdTableFromVector({{4}, {5}});
+  auto expectedTable3 = makeIdTableFromVector({{6}, {7}});
 
-  EXPECT_THAT(
-      convertToVector(std::move(generator)),
-      matchesIdTables(referenceTable1, referenceTable2, referenceTable3));
+  EXPECT_THAT(convertToVector(std::move(generator)),
+              matchesIdTables(expectedTable1, expectedTable2, expectedTable3));
+  EXPECT_EQ(resultSizeTotal, 5);
 }
 
 // _____________________________________________________________________________
 TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) {
   {
     auto throwingGenerator = []() -> Result::Generator {
-      ADD_FAILURE() << "Generator was started" << std::endl;
-      throw std::runtime_error("Generator was started");
+      std::string message = "Generator was started, but should not have been";
+      ADD_FAILURE() << message << std::endl;
+      throw std::runtime_error(message);
       co_return;
     }();
 
     Result result{std::move(throwingGenerator), {}};
+    uint64_t resultSizeTotal = 0;
     auto generator = ExportQueryExecutionTrees::getRowIndices(
-        LimitOffsetClause{._limit = 0, ._offset = 0}, result);
+        LimitOffsetClause{._limit = 0, ._offset = 0}, result, resultSizeTotal);
     EXPECT_NO_THROW(convertToVector(std::move(generator)));
   }
 
@@ -1467,17 +1517,22 @@ TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) {
                                      LocalVocab{}};
       co_yield pair1;
 
-      ADD_FAILURE() << "Generator was resumed" << std::endl;
-      throw std::runtime_error("Generator was resumed");
+      std::string message =
+          "Generator was called a second time, but should not "
+          "have been";
+      ADD_FAILURE() << message << std::endl;
+      throw std::runtime_error(message);
     }();
 
     Result result{std::move(throwAfterYieldGenerator), {}};
+    uint64_t resultSizeTotal = 0;
     auto generator = ExportQueryExecutionTrees::getRowIndices(
-        LimitOffsetClause{._limit = 1, ._offset = 0}, result);
-    IdTable referenceTable1 = makeIdTableFromVector({{1}});
+        LimitOffsetClause{._limit = 1, ._offset = 0}, result, resultSizeTotal);
+    IdTable expectedTable = makeIdTableFromVector({{1}});
     std::vector<IdTable> tables;
     EXPECT_NO_THROW({ tables = convertToVector(std::move(generator)); });
-    EXPECT_THAT(tables, matchesIdTables(referenceTable1));
+    EXPECT_THAT(tables, matchesIdTables(expectedTable));
+    EXPECT_EQ(resultSizeTotal, 1);
   }
 }