From 4594765d543e5e69e3eab3e8c726efb8f6ff3e35 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 11 Jul 2024 17:18:45 +0200
Subject: [PATCH 01/14] All tests passing, Let's feed this to the tools, and
 then implement the missing unit tests.

---
 src/engine/IndexScan.cpp       | 62 +++++++++++++++++++++++-----------
 src/engine/QueryPlanner.cpp    |  8 ++---
 src/index/Index.cpp            | 23 +++++++------
 src/index/Index.h              | 12 ++++---
 src/index/IndexImpl.cpp        | 42 ++++++++++++++++-------
 src/index/IndexImpl.h          | 12 ++++---
 test/IndexTest.cpp             | 13 +++----
 test/QueryPlannerTest.cpp      | 57 +++++++++++++------------------
 test/QueryPlannerTestHelpers.h |  3 ++
 test/util/IndexTestHelpers.cpp |  7 ++--
 10 files changed, 140 insertions(+), 99 deletions(-)
diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
index 0aec6a3e6a..24b69df383 100644
--- a/src/engine/IndexScan.cpp
+++ b/src/engine/IndexScan.cpp
@@ -32,11 +32,10 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
   }
   sizeEstimate_ = computeSizeEstimate();
 
-  // Check the following invariant: The permuted input triple must contain at
-  // least one variable, and all the variables must be at the end of the
+  // Check the following invariant: All the variables must be at the end of the
   // permuted triple. For example in the PSO permutation, either only the O, or
-  // the S and O, or all three of P, S, O can be variables, all other
-  // combinations are not supported.
+  // the S and O, or all three of P, S, O, or none of them can be variables, all
+  // other combinations are not supported.
   auto permutedTriple = getPermutedTriple();
   for (size_t i = 0; i < 3 - numVariables_; ++i) {
     AD_CONTRACT_CHECK(!permutedTriple.at(i)->isVariable());
@@ -57,7 +56,7 @@ string IndexScan::getCacheKeyImpl() const {
   auto permutationString = Permutation::toString(permutation_);
 
   if (numVariables_ == 3) {
-    os << "SCAN FOR FULL INDEX " << permutationString << " (DUMMY OPERATION)";
+    os << "SCAN FOR FULL INDEX " << permutationString;
 
   } else {
     os << "SCAN " << permutationString << " with ";
@@ -66,10 +65,9 @@ string IndexScan::getCacheKeyImpl() const {
       const auto& key = getPermutedTriple().at(idx)->toRdfLiteral();
       os << keyString << " = \"" << key << "\"";
     };
-    addKey(0);
-    if (numVariables_ == 1) {
+    for (size_t i = 0; i < 3 - numVariables_; ++i) {
+      addKey(i);
       os << ", ";
-      addKey(1);
     }
   }
   if (!additionalColumns_.empty()) {
@@ -93,6 +91,8 @@ size_t IndexScan::getResultWidth() const {
 // _____________________________________________________________________________
 vector<ColumnIndex> IndexScan::resultSortedOn() const {
   switch (numVariables_) {
+    case 0:
+      return {};
     case 1:
       return {ColumnIndex{0}};
     case 2:
@@ -132,11 +132,18 @@ Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
   const auto& index = _executionContext->getIndex();
   const auto permutedTriple = getPermutedTriple();
   if (numVariables_ == 2) {
-    idTable = index.scan(*permutedTriple[0], std::nullopt, permutation_,
-                         additionalColumns(), cancellationHandle_);
+    idTable =
+        index.scan(*permutedTriple[0], std::nullopt, std::nullopt, permutation_,
+                   additionalColumns(), cancellationHandle_);
   } else if (numVariables_ == 1) {
-    idTable = index.scan(*permutedTriple[0], *permutedTriple[1], permutation_,
-                         additionalColumns(), cancellationHandle_);
+    idTable =
+        index.scan(*permutedTriple[0], *permutedTriple[1], std::nullopt,
+                   permutation_, additionalColumns(), cancellationHandle_);
+  } else if (numVariables_ == 0) {
+    idTable =
+        index.scan(*permutedTriple[0], *permutedTriple[1], *permutedTriple[2],
+                   permutation_, additionalColumns(), cancellationHandle_);
+
   } else {
     AD_CORRECTNESS_CHECK(numVariables_ == 3);
     computeFullScan(&idTable, permutation_);
@@ -154,7 +161,11 @@ size_t IndexScan::computeSizeEstimate() const {
     // Should always be in this branch. Else is only for test cases.
 
     // We have to do a simple scan anyway so might as well do it now
-    if (numVariables_ == 1) {
+    if (numVariables_ == 0) {
+      return getIndex().getResultSizeOfScan(
+          *getPermutedTriple()[0], *getPermutedTriple().at(1),
+          *getPermutedTriple()[2], permutation_);
+    } else if (numVariables_ == 1) {
       // TODO<C++23> Use the monadic operation `std::optional::or_else`.
       // Note: we cannot use `optional::value_or()` here, because the else
       // case is expensive to compute, and we need it lazily evaluated.
@@ -165,8 +176,9 @@ size_t IndexScan::computeSizeEstimate() const {
       } else {
         // This call explicitly has to read two blocks of triples from memory to
         // obtain an exact size estimate.
-        return getIndex().getResultSizeOfScan(
-            *getPermutedTriple()[0], *getPermutedTriple().at(1), permutation_);
+        return getIndex().getResultSizeOfScan(*getPermutedTriple()[0],
+                                              *getPermutedTriple().at(1),
+                                              std::nullopt, permutation_);
       }
     } else if (numVariables_ == 2) {
       const TripleComponent& firstKey = *getPermutedTriple().at(0);
@@ -197,6 +209,9 @@ size_t IndexScan::getCostEstimate() { return getSizeEstimateBeforeLimit(); }
 // _____________________________________________________________________________
 void IndexScan::determineMultiplicities() {
   multiplicity_.clear();
+  if (numVariables_ == 0) {
+    return;
+  }
   if (_executionContext) {
     const auto& idx = getIndex();
     if (numVariables_ == 1) {
@@ -285,8 +300,12 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
   if (s.numVariables_ < 2) {
     col1Id = s.getPermutedTriple()[1]->toValueId(index.getVocab()).value();
   }
+  std::optional<Id> col2Id;
+  if (s.numVariables_ < 1) {
+    col2Id = s.getPermutedTriple()[2]->toValueId(index.getVocab()).value();
+  }
   return index.getPermutation(s.permutation())
-      .lazyScan({col0Id, col1Id, std::nullopt}, std::move(blocks),
+      .lazyScan({col0Id, col1Id, col2Id}, std::move(blocks),
                 s.additionalColumns(), s.cancellationHandle_);
 };
 
@@ -302,19 +321,24 @@ std::optional<Permutation::MetadataAndBlocks> IndexScan::getMetadataForScan(
   std::optional<Id> col1Id =
       numVars >= 2 ? std::nullopt
                    : permutedTriple[1]->toValueId(index.getVocab());
+  std::optional<Id> col2Id =
+      numVars >= 1 ? std::nullopt
+                   : permutedTriple[2]->toValueId(index.getVocab());
   if ((!col0Id.has_value() && numVars < 3) ||
-      (!col1Id.has_value() && numVars < 2)) {
+      (!col1Id.has_value() && numVars < 2) ||
+      (!col2Id.has_value() && numVars < 1)) {
     return std::nullopt;
   }
 
   return index.getPermutation(s.permutation())
-      .getMetadataAndBlocks({col0Id, col1Id, std::nullopt});
+      .getMetadataAndBlocks({col0Id, col1Id, col2Id});
 };
 
 // ________________________________________________________________
 std::array<Permutation::IdTableGenerator, 2>
 IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) {
   AD_CONTRACT_CHECK(s1.numVariables_ <= 3 && s2.numVariables_ <= 3);
+  AD_CONTRACT_CHECK(s1.numVariables_ >= 1 && s2.numVariables_ >= 1);
 
   // This function only works for single column joins. This means that the first
   // variable of both scans must be equal, but all other variables of the scans
@@ -363,7 +387,7 @@ IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) {
 Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan(
     std::span<const Id> joinColumn, const IndexScan& s) {
   AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn));
-  AD_CORRECTNESS_CHECK(s.numVariables_ <= 3);
+  AD_CORRECTNESS_CHECK(s.numVariables_ <= 3 && s.numVariables_ > 0);
 
   auto metaBlocks1 = getMetadataForScan(s);
 
diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp
index a1978b3bd1..cbb9d15e35 100644
--- a/src/engine/QueryPlanner.cpp
+++ b/src/engine/QueryPlanner.cpp
@@ -629,7 +629,9 @@ void QueryPlanner::seedFromOrdinaryTriple(
   const size_t numVars = static_cast<size_t>(isVariable(triple.s_)) +
                          static_cast<size_t>(isVariable(triple.p_)) +
                          static_cast<size_t>(isVariable(triple.o_));
-  if (numVars == 1) {
+  if (numVars == 0) {
+    addIndexScan(Permutation::Enum::PSO);
+  } else if (numVars == 1) {
     indexScanSingleVarCase(triple, addIndexScan);
   } else if (numVars == 2) {
     indexScanTwoVarsCase(triple, addIndexScan, addFilter);
@@ -673,10 +675,6 @@ auto QueryPlanner::seedWithScansAndText(
       seeds.push_back(getTextLeafPlan(node, textLimits));
       continue;
     }
-    if (node._variables.empty()) {
-      AD_THROW("Triples should have at least one variable. Not the case in: " +
-               node.triple_.asString());
-    }
 
     // Property paths must have been handled previously.
     AD_CORRECTNESS_CHECK(node.triple_.p_._operation ==
diff --git a/src/index/Index.cpp b/src/index/Index.cpp
index 8bc3fc351f..f8bf930f0b 100644
--- a/src/index/Index.cpp
+++ b/src/index/Index.cpp
@@ -278,24 +278,27 @@ vector<float> Index::getMultiplicities(const TripleComponent& key,
 IdTable Index::scan(
     const TripleComponent& col0String,
     std::optional<std::reference_wrapper<const TripleComponent>> col1String,
+    std::optional<std::reference_wrapper<const TripleComponent>> col2String,
     Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle) const {
-  return pimpl_->scan(col0String, col1String, p, additionalColumns,
-                      std::move(cancellationHandle));
+  return pimpl_->scan(col0String, col1String, col2String, p, additionalColumns,
+                      cancellationHandle);
 }
 
 // ____________________________________________________________________________
 IdTable Index::scan(
-    Id col0Id, std::optional<Id> col1Id, Permutation::Enum p,
-    Permutation::ColumnIndicesRef additionalColumns,
+    Id col0Id, std::optional<Id> col1Id, std::optional<Id> col2Id,
+    Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle) const {
-  return pimpl_->scan(col0Id, col1Id, p, additionalColumns,
-                      std::move(cancellationHandle));
+  return pimpl_->scan({col0Id, col1Id, col2Id}, p, additionalColumns,
+                      cancellationHandle);
 }
 
 // ____________________________________________________________________________
-size_t Index::getResultSizeOfScan(const TripleComponent& col0String,
-                                  const TripleComponent& col1String,
-                                  const Permutation::Enum& permutation) const {
-  return pimpl_->getResultSizeOfScan(col0String, col1String, permutation);
+size_t Index::getResultSizeOfScan(
+    const TripleComponent& col0String, const TripleComponent& col1String,
+    std::optional<std::reference_wrapper<const TripleComponent>> col2String,
+    const Permutation::Enum& permutation) const {
+  return pimpl_->getResultSizeOfScan(col0String, col1String, col2String,
+                                     permutation);
 }
diff --git a/src/index/Index.h b/src/index/Index.h
index 48c2717687..60489db04c 100644
--- a/src/index/Index.h
+++ b/src/index/Index.h
@@ -241,20 +241,22 @@ class Index {
   IdTable scan(
       const TripleComponent& col0String,
       std::optional<std::reference_wrapper<const TripleComponent>> col1String,
+      std::optional<std::reference_wrapper<const TripleComponent>> col2String,
       Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
       const ad_utility::SharedCancellationHandle& cancellationHandle) const;
 
   // Similar to the overload of `scan` above, but the keys are specified as IDs.
   IdTable scan(
-      Id col0Id, std::optional<Id> col1Id, Permutation::Enum p,
-      Permutation::ColumnIndicesRef additionalColumns,
+      Id col0Id, std::optional<Id> col1Id, std::optional<Id> col2Id,
+      Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
       const ad_utility::SharedCancellationHandle& cancellationHandle) const;
 
   // Similar to the previous overload of `scan`, but only get the exact size of
   // the scan result.
-  size_t getResultSizeOfScan(const TripleComponent& col0String,
-                             const TripleComponent& col1String,
-                             const Permutation::Enum& permutation) const;
+  size_t getResultSizeOfScan(
+      const TripleComponent& col0String, const TripleComponent& col1String,
+      std::optional<std::reference_wrapper<const TripleComponent>> col2String,
+      const Permutation::Enum& permutation) const;
 
   // Get access to the implementation. This should be used rarely as it
   // requires including the rather expensive `IndexImpl.h` header
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 49b55cb763..15abe39f4b 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1369,41 +1369,59 @@ vector<float> IndexImpl::getMultiplicities(
 IdTable IndexImpl::scan(
     const TripleComponent& col0String,
     std::optional<std::reference_wrapper<const TripleComponent>> col1String,
+    std::optional<std::reference_wrapper<const TripleComponent>> col2String,
     const Permutation::Enum& permutation,
     Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle) const {
+  AD_CORRECTNESS_CHECK(!col2String.has_value() || col1String.has_value());
   std::optional<Id> col0Id = col0String.toValueId(getVocab());
-  std::optional<Id> col1Id =
-      col1String.has_value() ? col1String.value().get().toValueId(getVocab())
-                             : std::nullopt;
-  if (!col0Id.has_value() || (col1String.has_value() && !col1Id.has_value())) {
-    size_t numColumns = col1String.has_value() ? 1 : 2;
+  // TODO<C++23> Use the monadic operations for std::optional.
+  bool elementNotFound = !col0Id.has_value();
+  size_t numColumns = 2;
+  auto handleOptional = [&elementNotFound, &numColumns,
+                         this](const auto& comp) -> std::optional<Id> {
+    if (!comp.has_value()) {
+      return std::nullopt;
+    }
+    --numColumns;
+    auto result = comp.value().get().toValueId(getVocab());
+    elementNotFound = elementNotFound || !result.has_value();
+    return result;
+  };
+  std::optional<Id> col1Id = handleOptional(col1String);
+  std::optional<Id> col2Id = handleOptional(col2String);
+  if (elementNotFound) {
     cancellationHandle->throwIfCancelled();
     return IdTable{numColumns + additionalColumns.size(), allocator_};
   }
-  return scan(col0Id.value(), col1Id, permutation, additionalColumns,
+  return scan({col0Id.value(), col1Id, col2Id}, permutation, additionalColumns,
               cancellationHandle);
 }
 // _____________________________________________________________________________
 IdTable IndexImpl::scan(
-    Id col0Id, std::optional<Id> col1Id, Permutation::Enum p,
-    Permutation::ColumnIndicesRef additionalColumns,
+    const Permutation::ScanSpecification& scanSpecification,
+    Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle) const {
-  return getPermutation(p).scan({col0Id, col1Id, std::nullopt},
-                                additionalColumns, cancellationHandle);
+  return getPermutation(p).scan(scanSpecification, additionalColumns,
+                                cancellationHandle);
 }
 
 // _____________________________________________________________________________
 size_t IndexImpl::getResultSizeOfScan(
     const TripleComponent& col0, const TripleComponent& col1,
+    std::optional<std::reference_wrapper<const TripleComponent>> col2,
     const Permutation::Enum& permutation) const {
   std::optional<Id> col0Id = col0.toValueId(getVocab());
   std::optional<Id> col1Id = col1.toValueId(getVocab());
-  if (!col0Id.has_value() || !col1Id.has_value()) {
+  std::optional<Id> col2Id = col2.has_value()
+                                 ? col2.value().get().toValueId(getVocab())
+                                 : std::nullopt;
+  if (!col0Id.has_value() || !col1Id.has_value() ||
+      (col2.has_value() && !col1Id.has_value())) {
     return 0;
   }
   const Permutation& p = getPermutation(permutation);
-  return p.getResultSizeOfScan({col0Id.value(), col1Id.value(), std::nullopt});
+  return p.getResultSizeOfScan({col0Id.value(), col1Id.value(), col2Id});
 }
 
 // _____________________________________________________________________________
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index 10c6ff0c79..ab7d5ce67a 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -400,20 +400,22 @@ class IndexImpl {
   IdTable scan(
       const TripleComponent& col0String,
       std::optional<std::reference_wrapper<const TripleComponent>> col1String,
+      std::optional<std::reference_wrapper<const TripleComponent>> col2String,
       const Permutation::Enum& permutation,
       Permutation::ColumnIndicesRef additionalColumns,
       const ad_utility::SharedCancellationHandle& cancellationHandle) const;
 
   // _____________________________________________________________________________
   IdTable scan(
-      Id col0Id, std::optional<Id> col1Id, Permutation::Enum p,
-      Permutation::ColumnIndicesRef additionalColumns,
+      const Permutation::ScanSpecification& scanSpecification,
+      Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
       const ad_utility::SharedCancellationHandle& cancellationHandle) const;
 
   // _____________________________________________________________________________
-  size_t getResultSizeOfScan(const TripleComponent& col0,
-                             const TripleComponent& col1,
-                             const Permutation::Enum& permutation) const;
+  size_t getResultSizeOfScan(
+      const TripleComponent& col0, const TripleComponent& col1,
+      std::optional<std::reference_wrapper<const TripleComponent>> col2,
+      const Permutation::Enum& permutation) const;
 
  private:
   // Private member functions
diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp
index 3aaed82ec1..2b38c14f75 100644
--- a/test/IndexTest.cpp
+++ b/test/IndexTest.cpp
@@ -36,9 +36,9 @@ auto makeTestScanWidthOne = [](const IndexImpl& index) {
                   ad_utility::source_location l =
                       ad_utility::source_location::current()) {
     auto t = generateLocationTrace(l);
-    IdTable result =
-        index.scan(c0, std::cref(c1), permutation, additionalColumns,
-                   std::make_shared<ad_utility::CancellationHandle<>>());
+    IdTable result = index.scan(
+        c0, std::cref(c1), std::nullopt, permutation, additionalColumns,
+        std::make_shared<ad_utility::CancellationHandle<>>());
     ASSERT_EQ(result.numColumns(), 1 + additionalColumns.size());
     ASSERT_EQ(result, makeIdTableFromVector(expected));
   };
@@ -54,9 +54,10 @@ auto makeTestScanWidthTwo = [](const IndexImpl& index) {
                   ad_utility::source_location l =
                       ad_utility::source_location::current()) {
     auto t = generateLocationTrace(l);
-    IdTable wol = index.scan(
-        c0, std::nullopt, permutation, Permutation::ColumnIndicesRef{},
-        std::make_shared<ad_utility::CancellationHandle<>>());
+    IdTable wol =
+        index.scan(c0, std::nullopt, std::nullopt, permutation,
+                   Permutation::ColumnIndicesRef{},
+                   std::make_shared<ad_utility::CancellationHandle<>>());
     ASSERT_EQ(wol, makeIdTableFromVector(expected));
   };
 };
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 7451016f2a..a32df5ac0c 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -287,31 +287,27 @@ TEST(QueryPlanner, testStarTwoFree) {
 }
 
 TEST(QueryPlanner, testFilterAfterSeed) {
-  ParsedQuery pq = SparqlParser::parseQuery(
+  auto scan = h::IndexScanFromStrings;
+  auto qec = ad_utility::testing::getQec("<s> <r> <x>");
+  h::expect(
       "SELECT ?x ?y ?z WHERE {"
       "?x <r> ?y . ?y <r> ?z . "
-      "FILTER(?x != ?y) }");
-  QueryPlanner qp = makeQueryPlanner();
-  QueryExecutionTree qet = qp.createExecutionTree(pq);
-  ASSERT_EQ(qet.getCacheKey(),
-            "FILTER JOIN\nSCAN POS with P = \"<r>\" join-column: "
-            "[0]\n|X|\nSCAN PSO with P = \"<r>\" join-column: [0] with "
-            "N16sparqlExpression10relational20RelationalExpressionILN18valueIdC"
-            "omparators10ComparisonE3EEE#column_1##column_0#");
+      "FILTER(?x != ?y) }",
+      h::Join(h::Filter("?x != ?y", scan("?x", "<r>", "?y")),
+              scan("?y", "<r>", "?z")),
+      qec);
 }
 
 TEST(QueryPlanner, testFilterAfterJoin) {
-  ParsedQuery pq = SparqlParser::parseQuery(
+  auto scan = h::IndexScanFromStrings;
+  auto qec = ad_utility::testing::getQec("<s> <r> <x>");
+  h::expect(
       "SELECT ?x ?y ?z WHERE {"
       "?x <r> ?y . ?y <r> ?z . "
-      "FILTER(?x != ?z) }");
-  QueryPlanner qp = makeQueryPlanner();
-  QueryExecutionTree qet = qp.createExecutionTree(pq);
-  ASSERT_EQ(qet.getCacheKey(),
-            "FILTER JOIN\nSCAN POS with P = \"<r>\" join-column: "
-            "[0]\n|X|\nSCAN PSO with P = \"<r>\" join-column: [0] with "
-            "N16sparqlExpression10relational20RelationalExpressionILN18valueIdC"
-            "omparators10ComparisonE3EEE#column_1##column_2#");
+      "FILTER(?x != ?z) }",
+      h::Filter("?x != ?z",
+                h::Join(scan("?x", "<r>", "?y"), scan("?y", "<r>", "?z"))),
+      qec);
 }
 
 TEST(QueryPlanner, threeVarTriples) {
@@ -565,25 +561,18 @@ TEST(QueryExecutionTreeTest, testFormerSegfaultTriFilter) {
 }
 
 TEST(QueryPlanner, testSimpleOptional) {
-  QueryPlanner qp = makeQueryPlanner();
-
-  ParsedQuery pq = SparqlParser::parseQuery(
+  auto scan = h::IndexScanFromStrings;
+  h::expect(
       "SELECT ?a ?b \n "
-      "WHERE  {?a <rel1> ?b . OPTIONAL { ?a <rel2> ?c }}");
-  QueryExecutionTree qet = qp.createExecutionTree(pq);
-  ASSERT_EQ(qet.getCacheKey(),
-            "OPTIONAL_JOIN\nSCAN PSO with P = \"<rel1>\" join-columns: "
-            "[0]\n|X|\nSCAN PSO with P = \"<rel2>\" join-columns: [0]");
-
-  ParsedQuery pq2 = SparqlParser::parseQuery(
+      "WHERE  {?a <rel1> ?b . OPTIONAL { ?a <rel2> ?c }}",
+      h::OptionalJoin(scan("?a", "<rel1>", "?b"), scan("?a", "<rel2>", "?c")));
+  h::expect(
       "SELECT ?a ?b \n "
       "WHERE  {?a <rel1> ?b . "
-      "OPTIONAL { ?a <rel2> ?c }} ORDER BY ?b");
-  QueryExecutionTree qet2 = qp.createExecutionTree(pq2);
-  ASSERT_EQ(qet2.getCacheKey(),
-            "ORDER BY on columns:asc(1) \nOPTIONAL_JOIN\nSCAN PSO with P = "
-            "\"<rel1>\" join-columns: [0]\n|X|\nSCAN PSO with P = \"<rel2>\" "
-            "join-columns: [0]");
+      "OPTIONAL { ?a <rel2> ?c }} ORDER BY ?b",
+      h::OrderBy({{Variable{"?b"}, ::OrderBy::AscOrDesc::Asc}},
+                 h::OptionalJoin(scan("?a", "<rel1>", "?b"),
+                                 scan("?a", "<rel2>", "?c"))));
 }
 
 TEST(QueryPlanner, SimpleTripleOneVariable) {
diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h
index c33f9a2faa..37c48f3124 100644
--- a/test/QueryPlannerTestHelpers.h
+++ b/test/QueryPlannerTestHelpers.h
@@ -16,6 +16,7 @@
 #include "engine/Join.h"
 #include "engine/MultiColumnJoin.h"
 #include "engine/NeutralElementOperation.h"
+#include "engine/OptionalJoin.h"
 #include "engine/OrderBy.h"
 #include "engine/QueryExecutionTree.h"
 #include "engine/QueryPlanner.h"
@@ -204,6 +205,8 @@ inline auto IndexScanFromStrings =
 inline auto MultiColumnJoin = MatchTypeAndUnorderedChildren<::MultiColumnJoin>;
 inline auto Join = MatchTypeAndUnorderedChildren<::Join>;
 
+constexpr auto OptionalJoin = MatchTypeAndOrderedChildren<::OptionalJoin>;
+
 // Return a matcher that matches a query execution tree that consists of
 // multiple JOIN operations that join the `children`. The `INTERNAL SORT BY`
 // operations required for the joins are also ignored by this matcher.
diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp
index 4128af6612..cf12731650 100644
--- a/test/util/IndexTestHelpers.cpp
+++ b/test/util/IndexTestHelpers.cpp
@@ -60,8 +60,9 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
   auto hasPatternId = qlever::specialIds.at(HAS_PATTERN_PREDICATE);
   auto checkSingleElement = [&cancellationDummy, &hasPatternId](
                                 const Index& index, size_t patternIdx, Id id) {
-    auto scanResultHasPattern = index.scan(
-        hasPatternId, id, Permutation::Enum::PSO, {}, cancellationDummy);
+    auto scanResultHasPattern =
+        index.scan(hasPatternId, id, std::nullopt, Permutation::Enum::PSO, {},
+                   cancellationDummy);
     // Each ID has at most one pattern, it can have none if it doesn't
     // appear as a subject in the knowledge graph.
     AD_CORRECTNESS_CHECK(scanResultHasPattern.numRows() <= 1);
@@ -79,7 +80,7 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
         auto cancellationDummy =
             std::make_shared<ad_utility::CancellationHandle<>>();
         auto scanResult = index.scan(
-            col0Id, std::nullopt, permutation,
+            col0Id, std::nullopt, std::nullopt, permutation,
             std::array{ColumnIndex{ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN},
                        ColumnIndex{ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN}},
             cancellationDummy);

From 4d6d981bd0e51a9264556efa236e6eac527bccb3 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 11 Jul 2024 20:36:02 +0200
Subject: [PATCH 02/14] Add some unit tests.

---
 src/index/IndexImpl.cpp       |  2 +-
 test/QueryPlannerTest.cpp     |  3 +-
 test/engine/IndexScanTest.cpp | 64 +++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 612c4db4bc..88ceee38ae 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1419,7 +1419,7 @@ size_t IndexImpl::getResultSizeOfScan(
                                  ? col2.value().get().toValueId(getVocab())
                                  : std::nullopt;
   if (!col0Id.has_value() || !col1Id.has_value() ||
-      (col2.has_value() && !col1Id.has_value())) {
+      (col2.has_value() && !col2Id.has_value())) {
     return 0;
   }
   const Permutation& p = getPermutation(permutation);
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index a32df5ac0c..5f01553168 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -288,7 +288,8 @@ TEST(QueryPlanner, testStarTwoFree) {
 
 TEST(QueryPlanner, testFilterAfterSeed) {
   auto scan = h::IndexScanFromStrings;
-  auto qec = ad_utility::testing::getQec("<s> <r> <x>");
+  auto qec = ad_utility::testing::getQec(
+      "<s> <r> <x>, <x2>, <x3>. <s2> <r> <y1>, <y2>, <y3>.");
   h::expect(
       "SELECT ?x ?y ?z WHERE {"
       "?x <r> ?y . ?y <r> ?z . "
diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp
index 2920a2cd8e..abc5babd38 100644
--- a/test/engine/IndexScanTest.cpp
+++ b/test/engine/IndexScanTest.cpp
@@ -380,3 +380,67 @@ TEST(IndexScan, additionalColumn) {
       {{getId("<x>"), getId("<z>"), I(0), I(NO_PATTERN)}});
   EXPECT_THAT(res.idTable(), ::testing::ElementsAreArray(exp));
 }
+
+TEST(IndexScan, getResultSizeOfScan) {
+  auto qec = getQec("<x> <p> <s1>, <s2>. <x> <p2> <s1>.");
+  auto getId = makeGetId(qec->getIndex());
+  [[maybe_unused]] auto x = getId("<x>");
+  [[maybe_unused]] auto p = getId("<p>");
+  [[maybe_unused]] auto s1 = getId("<s1>");
+  [[maybe_unused]] auto s2 = getId("<s2>");
+  [[maybe_unused]] auto p2 = getId("<p2>");
+  using V = Variable;
+  using I = TripleComponent::Iri;
+
+  {
+    SparqlTripleSimple scanTriple{V{"?x"}, V("?y"), V{"?z"}};
+    IndexScan scan{qec, Permutation::Enum::PSO, scanTriple};
+    // Note: this currently also contains the (internal) triple for the
+    // `ql:has-pattern` relation of `<x>`.
+    EXPECT_EQ(scan.getSizeEstimate(), 4);
+  }
+  {
+    SparqlTripleSimple scanTriple{V{"?x"}, I::fromIriref("<p>"), V{"?y"}};
+    IndexScan scan{qec, Permutation::Enum::PSO, scanTriple};
+    EXPECT_EQ(scan.getSizeEstimate(), 2);
+  }
+  {
+    SparqlTripleSimple scanTriple{I::fromIriref("<x>"), I::fromIriref("<p>"),
+                                  V{"?y"}};
+    IndexScan scan{qec, Permutation::Enum::PSO, scanTriple};
+    EXPECT_EQ(scan.getSizeEstimate(), 2);
+  }
+  {
+    SparqlTripleSimple scanTriple{V("?x"), I::fromIriref("<p>"),
+                                  I::fromIriref("<s1>")};
+    IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
+    EXPECT_EQ(scan.getSizeEstimate(), 1);
+  }
+  // 0 variables
+  {
+    SparqlTripleSimple scanTriple{I::fromIriref("<x>"), I::fromIriref("<p>"),
+                                  I::fromIriref("<s1>")};
+    IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
+    EXPECT_EQ(scan.getSizeEstimate(), 1);
+    EXPECT_ANY_THROW(scan.getMultiplicity(0));
+    auto res = scan.computeResultOnlyForTesting();
+    ASSERT_EQ(res.idTable().numRows(), 1);
+    ASSERT_EQ(res.idTable().numColumns(), 0);
+  }
+  {
+    SparqlTripleSimple scanTriple{I::fromIriref("<x2>"), I::fromIriref("<p>"),
+                                  I::fromIriref("<s1>")};
+    IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
+    EXPECT_EQ(scan.getSizeEstimate(), 0);
+  }
+  {
+    SparqlTripleSimple scanTriple{I::fromIriref("<x>"), I::fromIriref("<p>"),
+                                  I::fromIriref("<p>")};
+    IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
+    EXPECT_EQ(scan.getSizeEstimate(), 0);
+    EXPECT_ANY_THROW(scan.getMultiplicity(0));
+    auto res = scan.computeResultOnlyForTesting();
+    ASSERT_EQ(res.idTable().numRows(), 0);
+    ASSERT_EQ(res.idTable().numColumns(), 0);
+  }
+}

From 65e569b3b26db106df1d43d22e12f32ef1c365a6 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 12 Jul 2024 09:25:24 +0200
Subject: [PATCH 03/14] Fix the unrelated unit test.

---
 test/QueryPlannerTest.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 5f01553168..59a3a5b8f5 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -294,8 +294,8 @@ TEST(QueryPlanner, testFilterAfterSeed) {
       "SELECT ?x ?y ?z WHERE {"
       "?x <r> ?y . ?y <r> ?z . "
       "FILTER(?x != ?y) }",
-      h::Join(h::Filter("?x != ?y", scan("?x", "<r>", "?y")),
-              scan("?y", "<r>", "?z")),
+      h::Filter("?x != ?y",
+                h::Join(scan("?x", "<r>", "?y"), scan("?y", "<r>", "?z"))),
       qec);
 }
 

From ee0848fe465af4c9a41f941d3932666b1cb55d23 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 12 Jul 2024 18:30:42 +0200
Subject: [PATCH 04/14] Continue this work on Monday morning, we really have to
 start cleaning up this mess.

---
 src/engine/IndexScan.cpp  | 59 +++++++++++++++------------------------
 src/engine/IndexScan.h    |  2 ++
 src/index/Index.cpp       | 32 ++++++++++++++-------
 src/index/Index.h         | 32 ++++++++++++++-------
 src/index/IndexImpl.cpp   | 46 ++++++++----------------------
 src/index/IndexImpl.h     | 16 ++++-------
 test/QueryPlannerTest.cpp | 14 ++++++++++
 7 files changed, 101 insertions(+), 100 deletions(-)

diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
index a8f1daa28b..2ab72c8ac3 100644
--- a/src/engine/IndexScan.cpp
+++ b/src/engine/IndexScan.cpp
@@ -6,6 +6,7 @@
 
 #include <absl/strings/str_join.h>
 
+#include <boost/optional.hpp>
 #include <sstream>
 #include <string>
 
@@ -130,20 +131,9 @@ Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
   using enum Permutation::Enum;
   idTable.setNumColumns(numVariables_);
   const auto& index = _executionContext->getIndex();
-  const auto permutedTriple = getPermutedTriple();
-  if (numVariables_ == 2) {
-    idTable =
-        index.scan(*permutedTriple[0], std::nullopt, std::nullopt, permutation_,
-                   additionalColumns(), cancellationHandle_, getLimit());
-  } else if (numVariables_ == 1) {
-    idTable = index.scan(*permutedTriple[0], *permutedTriple[1], std::nullopt,
-                         permutation_, additionalColumns(), cancellationHandle_,
-                         getLimit());
-  } else if (numVariables_ == 0) {
-    idTable = index.scan(*permutedTriple[0], *permutedTriple[1],
-                         *permutedTriple[2], permutation_, additionalColumns(),
-                         cancellationHandle_, getLimit());
-
+  if (numVariables_ < 3) {
+    idTable = index.scan(getPermutedTripleNoVariables(), permutation_,
+                         additionalColumns(), cancellationHandle_, getLimit());
   } else {
     AD_CORRECTNESS_CHECK(numVariables_ == 3);
     computeFullScan(&idTable, permutation_);
@@ -158,14 +148,7 @@ Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
 // _____________________________________________________________________________
 size_t IndexScan::computeSizeEstimate() const {
   if (_executionContext) {
-    // Should always be in this branch. Else is only for test cases.
-
-    // We have to do a simple scan anyway so might as well do it now
-    if (numVariables_ == 0) {
-      return getIndex().getResultSizeOfScan(
-          *getPermutedTriple()[0], *getPermutedTriple().at(1),
-          *getPermutedTriple()[2], permutation_);
-    } else if (numVariables_ == 1) {
+    if (numVariables_ == 1) {
       // TODO<C++23> Use the monadic operation `std::optional::or_else`.
       // Note: we cannot use `optional::value_or()` here, because the else
       // case is expensive to compute, and we need it lazily evaluated.
@@ -173,16 +156,13 @@ size_t IndexScan::computeSizeEstimate() const {
               getCacheKey());
           size.has_value()) {
         return size.value();
-      } else {
-        // This call explicitly has to read two blocks of triples from memory to
-        // obtain an exact size estimate.
-        return getIndex().getResultSizeOfScan(*getPermutedTriple()[0],
-                                              *getPermutedTriple().at(1),
-                                              std::nullopt, permutation_);
       }
-    } else if (numVariables_ == 2) {
-      const TripleComponent& firstKey = *getPermutedTriple().at(0);
-      return getIndex().getCardinality(firstKey, permutation_);
+    }
+
+    // We have to do a simple scan anyway so might as well do it now
+    if (numVariables_ < 3) {
+      return getIndex().getResultSizeOfScan(getPermutedTripleNoVariables(),
+                                            permutation_);
     } else {
       // The triple consists of three variables.
       // TODO<joka921> As soon as all implementations of a full index scan
@@ -292,6 +272,13 @@ std::array<const TripleComponent* const, 3> IndexScan::getPermutedTriple()
           triple[permutation[2]]};
 }
 
+// ___________________________________________________________________________
+ScanSpecificationAsTripleComponent IndexScan::getPermutedTripleNoVariables()
+    const {
+  auto permutedTriple = getPermutedTriple();
+  return {*permutedTriple[0], *permutedTriple[1], *permutedTriple[2]};
+}
+
 // ___________________________________________________________________________
 Permutation::IdTableGenerator IndexScan::getLazyScan(
     const IndexScan& s, std::vector<CompressedBlockMetadata> blocks) {
@@ -304,11 +291,11 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
   if (s.numVariables_ < 2) {
     col1Id = s.getPermutedTriple()[1]->toValueId(index.getVocab()).value();
   }
-  std::optional<Id> col2Id;
-  if (s.numVariables_ < 1) {
-    col2Id = s.getPermutedTriple()[2]->toValueId(index.getVocab()).value();
-  }
 
+  // This function is currently only called by the `getLazyScanForJoin...`
+  // functions. In these cases we always have at least one variable in each of
+  // the scans, because otherwise there would be no join column.
+  AD_CORRECTNESS_CHECK(s.numVariables_ >= 1);
   // If there is a LIMIT or OFFSET clause that constrains the scan
   // (which can happen with an explicit subquery), we cannot use the prefiltered
   // blocks, as we currently have no mechanism to include limits and offsets
@@ -318,7 +305,7 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
                           : std::nullopt;
 
   return index.getPermutation(s.permutation())
-      .lazyScan({col0Id, col1Id, col2Id}, std::move(actualBlocks),
+      .lazyScan({col0Id, col1Id, std::nullopt}, std::move(actualBlocks),
                 s.additionalColumns(), s.cancellationHandle_, s.getLimit());
 };
 
diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h
index 6290dfb8d0..edee48a509 100644
--- a/src/engine/IndexScan.h
+++ b/src/engine/IndexScan.h
@@ -3,6 +3,7 @@
 // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
 #pragma once
 
+#include <boost/optional.hpp>
 #include <string>
 
 #include "./Operation.h"
@@ -100,6 +101,7 @@ class IndexScan final : public Operation {
   // `permutation_`. For example if `permutation_ == PSO` then the result is
   // {&predicate_, &subject_, &object_}
   std::array<const TripleComponent* const, 3> getPermutedTriple() const;
+  ScanSpecificationAsTripleComponent getPermutedTripleNoVariables() const;
 
  private:
   Result computeResult([[maybe_unused]] bool requestLaziness) override;
diff --git a/src/index/Index.cpp b/src/index/Index.cpp
index 1df87b5a82..39f255d58b 100644
--- a/src/index/Index.cpp
+++ b/src/index/Index.cpp
@@ -276,31 +276,43 @@ vector<float> Index::getMultiplicities(const TripleComponent& key,
 
 // ____________________________________________________________________________
 IdTable Index::scan(
-    const TripleComponent& col0String,
-    std::optional<std::reference_wrapper<const TripleComponent>> col1String,
-    std::optional<std::reference_wrapper<const TripleComponent>> col2String,
+    const ScanSpecificationAsTripleComponent& scanSpecification,
     Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
     const LimitOffsetClause& limitOffset) const {
-  return pimpl_->scan(col0String, col1String, col2String, p, additionalColumns,
+  return pimpl_->scan(scanSpecification, p, additionalColumns,
                       cancellationHandle, limitOffset);
 }
 
 // ____________________________________________________________________________
 IdTable Index::scan(
-    Id col0Id, std::optional<Id> col1Id, std::optional<Id> col2Id,
+    const Permutation::ScanSpecification& scanSpecification,
     Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
     const LimitOffsetClause& limitOffset) const {
-  return pimpl_->scan({col0Id, col1Id, col2Id}, p, additionalColumns,
+  return pimpl_->scan(scanSpecification, p, additionalColumns,
                       cancellationHandle, limitOffset);
 }
 
 // ____________________________________________________________________________
 size_t Index::getResultSizeOfScan(
-    const TripleComponent& col0String, const TripleComponent& col1String,
-    std::optional<std::reference_wrapper<const TripleComponent>> col2String,
+    const ScanSpecificationAsTripleComponent& scanSpecification,
     const Permutation::Enum& permutation) const {
-  return pimpl_->getResultSizeOfScan(col0String, col1String, col2String,
-                                     permutation);
+  return pimpl_->getResultSizeOfScan(scanSpecification, permutation);
+}
+// ____________________________________________________________________________
+std::optional<Permutation::ScanSpecification>
+ScanSpecificationAsTripleComponent::toScanSpecification(
+    const IndexImpl& index) const {
+  std::optional<Id> col0Id = col0_.toValueId(index.getVocab());
+  std::optional<Id> col1Id = col1_.has_value()
+                                 ? col1_.value().toValueId(index.getVocab())
+                                 : std::nullopt;
+  std::optional<Id> col2Id = col2_.has_value()
+                                 ? col2_.value().toValueId(index.getVocab())
+                                 : std::nullopt;
+  if (!col0Id.has_value() || (col1_.has_value() && !col1Id.has_value()) ||
+      (col2_.has_value() && !col2Id.has_value())) {
+    return std::nullopt;
+  }
 }
diff --git a/src/index/Index.h b/src/index/Index.h
index e1322b5b5e..5bd88ea58b 100644
--- a/src/index/Index.h
+++ b/src/index/Index.h
@@ -23,6 +23,21 @@ class IdTable;
 class TextBlockMetaData;
 class IndexImpl;
 
+// TODO<joka921> Comment.
+struct ScanSpecificationAsTripleComponent {
+  TripleComponent col0_;
+  std::optional<TripleComponent> col1_;
+  std::optional<TripleComponent> col2_;
+  ScanSpecificationAsTripleComponent(TripleComponent col0,
+                                     const TripleComponent& col1,
+                                     const TripleComponent& col2);
+  std::optional<Permutation::ScanSpecification> toScanSpecification(
+      const IndexImpl& index) const;
+  size_t numColumns() const {
+    return 2 - col1_.has_value() - col2_.has_value();
+  }
+};
+
 class Index {
  private:
   // Pimpl to reduce compile times.
@@ -238,16 +253,14 @@ class Index {
    * @param p The Permutation::Enum to use (in particularly POS(), SOP,...
    * members of Index class).
    */
-  IdTable scan(
-      const TripleComponent& col0String,
-      std::optional<std::reference_wrapper<const TripleComponent>> col1String,
-      std::optional<std::reference_wrapper<const TripleComponent>> col2String,
-      Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
-      const ad_utility::SharedCancellationHandle& cancellationHandle,
-      const LimitOffsetClause& limitOffset = {}) const;
+  IdTable scan(const ScanSpecificationAsTripleComponent& scanSpecification,
+               Permutation::Enum p,
+               Permutation::ColumnIndicesRef additionalColumns,
+               const ad_utility::SharedCancellationHandle& cancellationHandle,
+               const LimitOffsetClause& limitOffset = {}) const;
 
   // Similar to the overload of `scan` above, but the keys are specified as IDs.
-  IdTable scan(Id col0Id, std::optional<Id> col1Id, std::optional<Id> col2Id,
+  IdTable scan(const Permutation::ScanSpecification& scanSpecification,
                Permutation::Enum p,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
@@ -256,8 +269,7 @@ class Index {
   // Similar to the previous overload of `scan`, but only get the exact size of
   // the scan result.
   size_t getResultSizeOfScan(
-      const TripleComponent& col0String, const TripleComponent& col1String,
-      std::optional<std::reference_wrapper<const TripleComponent>> col2String,
+      const ScanSpecificationAsTripleComponent& scanSpecification,
       const Permutation::Enum& permutation) const;
 
   // Get access to the implementation. This should be used rarely as it
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 88ceee38ae..71fec97cfa 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1367,35 +1367,19 @@ vector<float> IndexImpl::getMultiplicities(
 
 // _____________________________________________________________________________
 IdTable IndexImpl::scan(
-    const TripleComponent& col0String,
-    std::optional<std::reference_wrapper<const TripleComponent>> col1String,
-    std::optional<std::reference_wrapper<const TripleComponent>> col2String,
+    const ScanSpecificationAsTripleComponent& scanSpecificationAsTc,
     const Permutation::Enum& permutation,
     Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
     const LimitOffsetClause& limitOffset) const {
-  AD_CORRECTNESS_CHECK(!col2String.has_value() || col1String.has_value());
-  std::optional<Id> col0Id = col0String.toValueId(getVocab());
-  // TODO<C++23> Use the monadic operations for std::optional.
-  bool elementNotFound = !col0Id.has_value();
-  size_t numColumns = 2;
-  auto handleOptional = [&elementNotFound, &numColumns,
-                         this](const auto& comp) -> std::optional<Id> {
-    if (!comp.has_value()) {
-      return std::nullopt;
-    }
-    --numColumns;
-    auto result = comp.value().get().toValueId(getVocab());
-    elementNotFound = elementNotFound || !result.has_value();
-    return result;
-  };
-  std::optional<Id> col1Id = handleOptional(col1String);
-  std::optional<Id> col2Id = handleOptional(col2String);
-  if (elementNotFound) {
+  auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this);
+  if (!scanSpecification.has_value()) {
     cancellationHandle->throwIfCancelled();
-    return IdTable{numColumns + additionalColumns.size(), allocator_};
+    return IdTable{
+        scanSpecificationAsTc.numColumns() + additionalColumns.size(),
+        allocator_};
   }
-  return scan({col0Id.value(), col1Id, col2Id}, permutation, additionalColumns,
+  return scan(scanSpecification.value(), permutation, additionalColumns,
               cancellationHandle, limitOffset);
 }
 // _____________________________________________________________________________
@@ -1410,20 +1394,14 @@ IdTable IndexImpl::scan(
 
 // _____________________________________________________________________________
 size_t IndexImpl::getResultSizeOfScan(
-    const TripleComponent& col0, const TripleComponent& col1,
-    std::optional<std::reference_wrapper<const TripleComponent>> col2,
+    const ScanSpecificationAsTripleComponent& scanSpecificationAsTc,
     const Permutation::Enum& permutation) const {
-  std::optional<Id> col0Id = col0.toValueId(getVocab());
-  std::optional<Id> col1Id = col1.toValueId(getVocab());
-  std::optional<Id> col2Id = col2.has_value()
-                                 ? col2.value().get().toValueId(getVocab())
-                                 : std::nullopt;
-  if (!col0Id.has_value() || !col1Id.has_value() ||
-      (col2.has_value() && !col2Id.has_value())) {
+  const Permutation& p = getPermutation(permutation);
+  auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this);
+  if (!scanSpecification.has_value()) {
     return 0;
   }
-  const Permutation& p = getPermutation(permutation);
-  return p.getResultSizeOfScan({col0Id.value(), col1Id.value(), col2Id});
+  return p.getResultSizeOfScan(scanSpecification.value());
 }
 
 // _____________________________________________________________________________
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index d5020ac717..6caef088aa 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -397,14 +397,11 @@ class IndexImpl {
   vector<float> getMultiplicities(Permutation::Enum permutation) const;
 
   // _____________________________________________________________________________
-  IdTable scan(
-      const TripleComponent& col0String,
-      std::optional<std::reference_wrapper<const TripleComponent>> col1String,
-      std::optional<std::reference_wrapper<const TripleComponent>> col2String,
-      const Permutation::Enum& permutation,
-      Permutation::ColumnIndicesRef additionalColumns,
-      const ad_utility::SharedCancellationHandle& cancellationHandle,
-      const LimitOffsetClause& limitOffset = {}) const;
+  IdTable scan(const ScanSpecificationAsTripleComponent& scanSpecification,
+               const Permutation::Enum& permutation,
+               Permutation::ColumnIndicesRef additionalColumns,
+               const ad_utility::SharedCancellationHandle& cancellationHandle,
+               const LimitOffsetClause& limitOffset = {}) const;
 
   // _____________________________________________________________________________
   IdTable scan(const Permutation::ScanSpecification& scanSpecification,
@@ -415,8 +412,7 @@ class IndexImpl {
 
   // _____________________________________________________________________________
   size_t getResultSizeOfScan(
-      const TripleComponent& col0, const TripleComponent& col1,
-      std::optional<std::reference_wrapper<const TripleComponent>> col2,
+      const ScanSpecificationAsTripleComponent& scanSpecification,
       const Permutation::Enum& permutation) const;
 
  private:
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 59a3a5b8f5..59465bfb40 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -204,6 +204,20 @@ TEST(QueryPlanner, testBFSLeaveOut) {
   }
 }
 
+TEST(QueryPlanner, indexScanZeroVariables) {
+  auto scan = h::IndexScanFromStrings;
+  using enum Permutation::Enum;
+  h::expect(
+      "SELECT * \n "
+      "WHERE \t {<x> <y> <z>}",
+      scan("<x>", "<y>", "<z>"));
+  h::expect(
+      "SELECT * \n "
+      "WHERE \t {<x> <y> <z> . <x> <y> ?z}",
+      h::CartesianProductJoin(scan("<x>", "<y>", "<z>"),
+                              scan("<x>", "<y>", "?z")));
+}
+
 TEST(QueryPlanner, indexScanOneVariable) {
   auto scan = h::IndexScanFromStrings;
   using enum Permutation::Enum;

From b9afa81335d66babedfadea85424e8ab0451ace1 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Mon, 15 Jul 2024 09:58:30 +0200
Subject: [PATCH 05/14] Continue this work on Monday morning, we really have to
 start cleaning up this mess.

---
 src/index/Index.cpp            | 34 +++++++++++++++++++++++++++++++++-
 src/index/Index.h              | 15 +++++++++------
 test/IndexTest.cpp             |  8 ++++----
 test/util/IndexTestHelpers.cpp |  4 ++--
 4 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/src/index/Index.cpp b/src/index/Index.cpp
index 39f255d58b..f853d430df 100644
--- a/src/index/Index.cpp
+++ b/src/index/Index.cpp
@@ -300,11 +300,17 @@ size_t Index::getResultSizeOfScan(
     const Permutation::Enum& permutation) const {
   return pimpl_->getResultSizeOfScan(scanSpecification, permutation);
 }
+
 // ____________________________________________________________________________
 std::optional<Permutation::ScanSpecification>
 ScanSpecificationAsTripleComponent::toScanSpecification(
     const IndexImpl& index) const {
-  std::optional<Id> col0Id = col0_.toValueId(index.getVocab());
+  // TODO<C++23> Use `std::optional::transform`.
+  // TODO<SPARQL UPDATE>: We can also have LocalVocab entries is the
+  // ScanSpecification.
+  std::optional<Id> col0Id = col0_.has_value()
+                                 ? col0_.value().toValueId(index.getVocab())
+                                 : std::nullopt;
   std::optional<Id> col1Id = col1_.has_value()
                                  ? col1_.value().toValueId(index.getVocab())
                                  : std::nullopt;
@@ -315,4 +321,30 @@ ScanSpecificationAsTripleComponent::toScanSpecification(
       (col2_.has_value() && !col2Id.has_value())) {
     return std::nullopt;
   }
+  return Permutation::ScanSpecification{col0Id, col1Id, col2Id};
+}
+
+// ____________________________________________________________________________
+ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
+                                                                       T col1,
+                                                                       T col2) {
+  auto isUnbound = [](const T& t) {
+    return !t.has_value() || t.value().isVariable();
+  };
+  if (isUnbound(col0)) {
+    AD_CONTRACT_CHECK(isUnbound(col1));
+  }
+  if (isUnbound(col1)) {
+    AD_CONTRACT_CHECK(isUnbound(col2));
+  }
+  auto toNulloptIfVariable = [](T& tc) -> std::optional<TripleComponent> {
+    if (tc.has_value() && tc.value().isVariable()) {
+      return std::nullopt;
+    } else {
+      return std::move(tc);
+    }
+  };
+  col0_ = toNulloptIfVariable(col0);
+  col1_ = toNulloptIfVariable(col1);
+  col2_ = toNulloptIfVariable(col2);
 }
diff --git a/src/index/Index.h b/src/index/Index.h
index 5bd88ea58b..a6c3fce5dc 100644
--- a/src/index/Index.h
+++ b/src/index/Index.h
@@ -24,17 +24,20 @@ class TextBlockMetaData;
 class IndexImpl;
 
 // TODO<joka921> Comment.
-struct ScanSpecificationAsTripleComponent {
-  TripleComponent col0_;
+class ScanSpecificationAsTripleComponent {
+  using T = std::optional<TripleComponent>;
+
+ private:
+  std::optional<TripleComponent> col0_;
   std::optional<TripleComponent> col1_;
   std::optional<TripleComponent> col2_;
-  ScanSpecificationAsTripleComponent(TripleComponent col0,
-                                     const TripleComponent& col1,
-                                     const TripleComponent& col2);
+
+ public:
+  ScanSpecificationAsTripleComponent(T col0, T col1, T col2);
   std::optional<Permutation::ScanSpecification> toScanSpecification(
       const IndexImpl& index) const;
   size_t numColumns() const {
-    return 2 - col1_.has_value() - col2_.has_value();
+    return 3 - col0_.has_value() - col1_.has_value() - col2_.has_value();
   }
 };
 
diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp
index 2b38c14f75..0e5e96267e 100644
--- a/test/IndexTest.cpp
+++ b/test/IndexTest.cpp
@@ -36,9 +36,9 @@ auto makeTestScanWidthOne = [](const IndexImpl& index) {
                   ad_utility::source_location l =
                       ad_utility::source_location::current()) {
     auto t = generateLocationTrace(l);
-    IdTable result = index.scan(
-        c0, std::cref(c1), std::nullopt, permutation, additionalColumns,
-        std::make_shared<ad_utility::CancellationHandle<>>());
+    IdTable result =
+        index.scan({c0, c1, std::nullopt}, permutation, additionalColumns,
+                   std::make_shared<ad_utility::CancellationHandle<>>());
     ASSERT_EQ(result.numColumns(), 1 + additionalColumns.size());
     ASSERT_EQ(result, makeIdTableFromVector(expected));
   };
@@ -55,7 +55,7 @@ auto makeTestScanWidthTwo = [](const IndexImpl& index) {
                       ad_utility::source_location::current()) {
     auto t = generateLocationTrace(l);
     IdTable wol =
-        index.scan(c0, std::nullopt, std::nullopt, permutation,
+        index.scan({c0, std::nullopt, std::nullopt}, permutation,
                    Permutation::ColumnIndicesRef{},
                    std::make_shared<ad_utility::CancellationHandle<>>());
     ASSERT_EQ(wol, makeIdTableFromVector(expected));
diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp
index cf12731650..17795652f8 100644
--- a/test/util/IndexTestHelpers.cpp
+++ b/test/util/IndexTestHelpers.cpp
@@ -61,7 +61,7 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
   auto checkSingleElement = [&cancellationDummy, &hasPatternId](
                                 const Index& index, size_t patternIdx, Id id) {
     auto scanResultHasPattern =
-        index.scan(hasPatternId, id, std::nullopt, Permutation::Enum::PSO, {},
+        index.scan({hasPatternId, id, std::nullopt}, Permutation::Enum::PSO, {},
                    cancellationDummy);
     // Each ID has at most one pattern, it can have none if it doesn't
     // appear as a subject in the knowledge graph.
@@ -80,7 +80,7 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
         auto cancellationDummy =
             std::make_shared<ad_utility::CancellationHandle<>>();
         auto scanResult = index.scan(
-            col0Id, std::nullopt, std::nullopt, permutation,
+            {col0Id, std::nullopt, std::nullopt}, permutation,
             std::array{ColumnIndex{ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN},
                        ColumnIndex{ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN}},
             cancellationDummy);

From 395dfa9fe20040e4a2dc9ded0a9b4a71b58d7c2f Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Mon, 15 Jul 2024 10:38:04 +0200
Subject: [PATCH 06/14] Removed the pinned sizes.

---
 src/engine/IndexScan.cpp           | 90 +++++++++---------------------
 src/engine/IndexScan.h             |  1 -
 src/engine/Operation.cpp           | 19 -------
 src/engine/QueryExecutionContext.h | 24 +-------
 src/engine/Server.cpp              |  1 -
 test/QueryPlannerTest.cpp          |  2 +-
 6 files changed, 28 insertions(+), 109 deletions(-)

diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
index 2ab72c8ac3..bbd1c98cdd 100644
--- a/src/engine/IndexScan.cpp
+++ b/src/engine/IndexScan.cpp
@@ -27,6 +27,9 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
       numVariables_(static_cast<size_t>(subject_.isVariable()) +
                     static_cast<size_t>(predicate_.isVariable()) +
                     static_cast<size_t>(object_.isVariable())) {
+  // We previously had `nullptr`s here in unit tests. This is no longer
+  // necessary nor allowed.
+  AD_CONTRACT_CHECK(qec != nullptr);
   for (auto& [idx, variable] : triple.additionalScanColumns_) {
     additionalColumns_.push_back(idx);
     additionalVariables_.push_back(variable);
@@ -91,18 +94,8 @@ size_t IndexScan::getResultWidth() const {
 
 // _____________________________________________________________________________
 vector<ColumnIndex> IndexScan::resultSortedOn() const {
-  switch (numVariables_) {
-    case 0:
-      return {};
-    case 1:
-      return {ColumnIndex{0}};
-    case 2:
-      return {ColumnIndex{0}, ColumnIndex{1}};
-    case 3:
-      return {ColumnIndex{0}, ColumnIndex{1}, ColumnIndex{2}};
-    default:
-      AD_FAIL();
-  }
+  auto resAsView = ad_utility::integerRange(ColumnIndex{numVariables_});
+  return std::vector<ColumnIndex>{resAsView.begin(), resAsView.end()};
 }
 
 // _____________________________________________________________________________
@@ -147,39 +140,20 @@ Result IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
 
 // _____________________________________________________________________________
 size_t IndexScan::computeSizeEstimate() const {
-  if (_executionContext) {
-    if (numVariables_ == 1) {
-      // TODO<C++23> Use the monadic operation `std::optional::or_else`.
-      // Note: we cannot use `optional::value_or()` here, because the else
-      // case is expensive to compute, and we need it lazily evaluated.
-      if (auto size = getExecutionContext()->getQueryTreeCache().getPinnedSize(
-              getCacheKey());
-          size.has_value()) {
-        return size.value();
-      }
-    }
-
-    // We have to do a simple scan anyway so might as well do it now
-    if (numVariables_ < 3) {
-      return getIndex().getResultSizeOfScan(getPermutedTripleNoVariables(),
-                                            permutation_);
-    } else {
-      // The triple consists of three variables.
-      // TODO<joka921> As soon as all implementations of a full index scan
-      // (Including the "dummy joins" in Join.cpp) consistently exclude the
-      // internal triples, this estimate should be changed to only return
-      // the number of triples in the actual knowledge graph (excluding the
-      // internal triples).
-      AD_CORRECTNESS_CHECK(numVariables_ == 3);
-      return getIndex().numTriples().normalAndInternal_();
-    }
+  AD_CORRECTNESS_CHECK(_executionContext);
+  // We have to do a simple scan anyway so might as well do it now
+  if (numVariables_ < 3) {
+    return getIndex().getResultSizeOfScan(getPermutedTripleNoVariables(),
+                                          permutation_);
   } else {
-    // Only for test cases. The handling of the objects is to make the
-    // strange query planner tests pass.
-    auto strLen = [](const auto& el) {
-      return (el.isString() ? el.getString() : el.toString()).size();
-    };
-    return 1000 + strLen(subject_) + strLen(object_) + strLen(predicate_);
+    // The triple consists of three variables.
+    // TODO<joka921> As soon as all implementations of a full index scan
+    // (Including the "dummy joins" in Join.cpp) consistently exclude the
+    // internal triples, this estimate should be changed to only return
+    // the number of triples in the actual knowledge graph (excluding the
+    // internal triples).
+    AD_CORRECTNESS_CHECK(numVariables_ == 3);
+    return getIndex().numTriples().normalAndInternal_();
   }
 }
 
@@ -192,32 +166,20 @@ size_t IndexScan::getCostEstimate() {
 
 // _____________________________________________________________________________
 void IndexScan::determineMultiplicities() {
-  multiplicity_.clear();
-  if (numVariables_ == 0) {
-    return;
-  }
-  if (_executionContext) {
+  multiplicity_ = [this]() -> std::vector<float> {
     const auto& idx = getIndex();
-    if (numVariables_ == 1) {
+    if (numVariables_ == 0) {
+      return {};
+    } else if (numVariables_ == 1) {
       // There are no duplicate triples in RDF and two elements are fixed.
-      multiplicity_.emplace_back(1);
+      return {1.0f};
     } else if (numVariables_ == 2) {
-      const auto permutedTriple = getPermutedTriple();
-      multiplicity_ = idx.getMultiplicities(*permutedTriple[0], permutation_);
+      return idx.getMultiplicities(*getPermutedTriple()[0], permutation_);
     } else {
       AD_CORRECTNESS_CHECK(numVariables_ == 3);
-      multiplicity_ = idx.getMultiplicities(permutation_);
+      return idx.getMultiplicities(permutation_);
     }
-  } else {
-    // This branch is only used in certain unit tests.
-    multiplicity_.emplace_back(1);
-    if (numVariables_ == 2) {
-      multiplicity_.emplace_back(1);
-    }
-    if (numVariables_ == 3) {
-      multiplicity_.emplace_back(1);
-    }
-  }
+  }();
   for ([[maybe_unused]] size_t i :
        std::views::iota(multiplicity_.size(), getResultWidth())) {
     multiplicity_.emplace_back(1);
diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h
index edee48a509..cc66e89855 100644
--- a/src/engine/IndexScan.h
+++ b/src/engine/IndexScan.h
@@ -3,7 +3,6 @@
 // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
 #pragma once
 
-#include <boost/optional.hpp>
 #include <string>
 
 #include "./Operation.h"
diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp
index cd38e3d504..b29e2c6098 100644
--- a/src/engine/Operation.cpp
+++ b/src/engine/Operation.cpp
@@ -88,25 +88,6 @@ std::shared_ptr<const Result> Operation::getResult(
   const bool pinResult =
       _executionContext->_pinSubtrees || pinFinalResultButNotSubtrees;
 
-  // When we pin the final result but no subtrees, we need to remember the sizes
-  // of all involved index scans that have only one free variable. Note that
-  // these index scans are executed already during query planning because they
-  // have to be executed anyway, for any query plan. If we don't remember these
-  // sizes here, future queries that take the result from the cache would redo
-  // these index scans. Note that we do not need to remember the multiplicity
-  // (and distinctness) because the multiplicity for an index scan with a single
-  // free variable is always 1.
-  if (pinFinalResultButNotSubtrees) {
-    auto lock =
-        getExecutionContext()->getQueryTreeCache().pinnedSizes().wlock();
-    forAllDescendants([&lock](QueryExecutionTree* child) {
-      if (child->getRootOperation()->isIndexScanWithNumVariables(1)) {
-        (*lock)[child->getRootOperation()->getCacheKey()] =
-            child->getSizeEstimate();
-      }
-    });
-  }
-
   try {
     // In case of an exception, create the correct runtime info, no matter which
     // exception handler is called.
diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h
index ba2bc24487..8311c32eb7 100644
--- a/src/engine/QueryExecutionContext.h
+++ b/src/engine/QueryExecutionContext.h
@@ -55,34 +55,12 @@ class CacheValue {
 // by another query.
 using ConcurrentLruCache = ad_utility::ConcurrentCache<
     ad_utility::LRUCache<string, CacheValue, CacheValue::SizeGetter>>;
-using PinnedSizes =
-    ad_utility::Synchronized<ad_utility::HashMap<std::string, size_t>,
-                             std::shared_mutex>;
 class QueryResultCache : public ConcurrentLruCache {
- private:
-  PinnedSizes _pinnedSizes;
-
  public:
   virtual ~QueryResultCache() = default;
-  void clearAll() override {
-    // The _pinnedSizes are not part of the (otherwise threadsafe) _cache
-    // and thus have to be manually locked.
-    auto lock = _pinnedSizes.wlock();
-    ConcurrentLruCache::clearAll();
-    lock->clear();
-  }
+  void clearAll() override { ConcurrentLruCache::clearAll(); }
   // Inherit the constructor.
   using ConcurrentLruCache::ConcurrentLruCache;
-  const PinnedSizes& pinnedSizes() const { return _pinnedSizes; }
-  PinnedSizes& pinnedSizes() { return _pinnedSizes; }
-  std::optional<size_t> getPinnedSize(const std::string& key) {
-    auto rlock = _pinnedSizes.rlock();
-    if (rlock->contains(key)) {
-      return rlock->at(key);
-    } else {
-      return std::nullopt;
-    }
-  }
 };
 
 // Execution context for queries.
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index 689d58c7c5..245f3cb94a 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -469,7 +469,6 @@ nlohmann::json Server::composeCacheStatsJson() const {
   // converter.
   result["non-pinned-size"] = cache_.nonPinnedSize().getBytes();
   result["pinned-size"] = cache_.pinnedSize().getBytes();
-  result["num-pinned-index-scan-sizes"] = cache_.pinnedSizes().rlock()->size();
   return result;
 }
 
diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp
index 59465bfb40..537d4c088c 100644
--- a/test/QueryPlannerTest.cpp
+++ b/test/QueryPlannerTest.cpp
@@ -17,7 +17,7 @@ constexpr auto iri = ad_utility::testing::iri;
 using ::testing::HasSubstr;
 
 QueryPlanner makeQueryPlanner() {
-  return QueryPlanner{nullptr,
+  return QueryPlanner{ad_utility::testing::getQec(),
                       std::make_shared<ad_utility::CancellationHandle<>>()};
 }
 

From 733229f8720af8507e0a0caa15c2054716fc88dd Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Mon, 15 Jul 2024 11:17:37 +0200
Subject: [PATCH 07/14] Cleaned up a lot of code.

---
 src/engine/CMakeLists.txt        |  7 ++-
 src/engine/ScanSpecification.cpp | 55 +++++++++++++++++++++
 src/engine/ScanSpecification.h   | 83 ++++++++++++++++++++++++++++++++
 src/index/CMakeLists.txt         |  2 +-
 src/index/CompressedRelation.h   | 43 +----------------
 src/index/Index.cpp              | 52 +-------------------
 src/index/Index.h                | 21 +-------
 src/index/IndexImpl.cpp          |  4 +-
 src/index/IndexImpl.h            |  3 +-
 src/index/Permutation.h          |  2 -
 test/CompressedRelationsTest.cpp |  7 ++-
 test/TriplesViewTest.cpp         |  2 +-
 12 files changed, 155 insertions(+), 126 deletions(-)
 create mode 100644 src/engine/ScanSpecification.cpp
 create mode 100644 src/engine/ScanSpecification.h

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index 6aba4674f8..9570706075 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -13,5 +13,8 @@ add_library(engine
         VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp
-        idTable/CompressedExternalIdTable.h)
-qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams)
+        idTable/CompressedExternalIdTable.h
+)
+add_library(scanSpecification ScanSpecification.cpp)
+qlever_target_link_libraries(scanSpecification index)
+qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
diff --git a/src/engine/ScanSpecification.cpp b/src/engine/ScanSpecification.cpp
new file mode 100644
index 0000000000..bc0730bc79
--- /dev/null
+++ b/src/engine/ScanSpecification.cpp
@@ -0,0 +1,55 @@
+//  Copyright 2024, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include "engine/ScanSpecification.h"
+
+#include "index/IndexImpl.h"
+
+// ____________________________________________________________________________
+std::optional<ScanSpecification>
+ScanSpecificationAsTripleComponent::toScanSpecification(
+    const IndexImpl& index) const {
+  // TODO<C++23> Use `std::optional::transform`.
+  // TODO<SPARQL UPDATE>: We can also have LocalVocab entries is the
+  // ScanSpecification.
+  std::optional<Id> col0Id = col0_.has_value()
+                                 ? col0_.value().toValueId(index.getVocab())
+                                 : std::nullopt;
+  std::optional<Id> col1Id = col1_.has_value()
+                                 ? col1_.value().toValueId(index.getVocab())
+                                 : std::nullopt;
+  std::optional<Id> col2Id = col2_.has_value()
+                                 ? col2_.value().toValueId(index.getVocab())
+                                 : std::nullopt;
+  if (!col0Id.has_value() || (col1_.has_value() && !col1Id.has_value()) ||
+      (col2_.has_value() && !col2Id.has_value())) {
+    return std::nullopt;
+  }
+  return ScanSpecification{col0Id, col1Id, col2Id};
+}
+
+// ____________________________________________________________________________
+ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
+                                                                       T col1,
+                                                                       T col2) {
+  auto isUnbound = [](const T& t) {
+    return !t.has_value() || t.value().isVariable();
+  };
+  if (isUnbound(col0)) {
+    AD_CONTRACT_CHECK(isUnbound(col1));
+  }
+  if (isUnbound(col1)) {
+    AD_CONTRACT_CHECK(isUnbound(col2));
+  }
+  auto toNulloptIfVariable = [](T& tc) -> std::optional<TripleComponent> {
+    if (tc.has_value() && tc.value().isVariable()) {
+      return std::nullopt;
+    } else {
+      return std::move(tc);
+    }
+  };
+  col0_ = toNulloptIfVariable(col0);
+  col1_ = toNulloptIfVariable(col1);
+  col2_ = toNulloptIfVariable(col2);
+}
diff --git a/src/engine/ScanSpecification.h b/src/engine/ScanSpecification.h
new file mode 100644
index 0000000000..e315fd29ba
--- /dev/null
+++ b/src/engine/ScanSpecification.h
@@ -0,0 +1,83 @@
+//  Copyright 2024, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#pragma once
+#include <optional>
+
+#include "global/Id.h"
+#include "parser/TripleComponent.h"
+
+// Forward declaration
+class IndexImpl;
+
+// The specification of a scan operation for a given permutation.
+// Can either be a full scan (all three elements are `std::nullopt`),
+// a scan for a fixed `col0Id`, a scan for a fixed `col0Id` and `col1Id`,
+// or even a scan for a single triple to check whether it is contained in
+// the knowledge graph at all. The values which are `nullopt` become variables
+// and are returned as columns in the result of the scan.
+class ScanSpecification {
+ private:
+  using T = std::optional<Id>;
+
+  T col0Id_;
+  T col1Id_;
+  T col2Id_;
+
+  void validate() const {
+    bool c0 = col0Id_.has_value();
+    bool c1 = col1Id_.has_value();
+    bool c2 = col2Id_.has_value();
+    if (!c0) {
+      AD_CORRECTNESS_CHECK(!c1 && !c2);
+    }
+    if (!c1) {
+      AD_CORRECTNESS_CHECK(!c2);
+    }
+  }
+
+ public:
+  ScanSpecification(T col0Id, T col1Id, T col2Id)
+      : col0Id_{col0Id}, col1Id_{col1Id}, col2Id_{col2Id} {
+    validate();
+  }
+  const T& col0Id() const { return col0Id_; }
+  const T& col1Id() const { return col1Id_; }
+  const T& col2Id() const { return col2Id_; }
+
+  // Only used in tests.
+  void setCol1Id(T col1Id) {
+    col1Id_ = col1Id;
+    validate();
+  }
+};
+
+// Same as `ScanSpecification` (see above), but stores `TripleComponent`s
+// instead of `Id`s.
+class ScanSpecificationAsTripleComponent {
+  using T = std::optional<TripleComponent>;
+
+ private:
+  std::optional<TripleComponent> col0_;
+  std::optional<TripleComponent> col1_;
+  std::optional<TripleComponent> col2_;
+
+ public:
+  // Construct from three optional `TripleComponent`s. If any of the three
+  // entries is unbound (`nullopt` or of type `Variable`), then all subsequent
+  // entries also have to be unbound. For example if `col0` is bound, but `col1`
+  // isn't, then `col2` also has to be unbound.
+  ScanSpecificationAsTripleComponent(T col0, T col1, T col2);
+
+  // Convert to a `ScanSpecification`. The `index` is used to convert the
+  // `TripleComponent` to `Id`s by looking them up in the vocabulary. Return
+  // `nullopt` if one of the vocab lookup fails. Note: Once we implement SPARQL
+  // UPDATE, we possibly also have to use a `LocalVocab` here.
+  // TODO<joka921> Try out if this can be done just now.
+  std::optional<ScanSpecification> toScanSpecification(
+      const IndexImpl& index) const;
+  size_t numColumns() const {
+    return 3 - col0_.has_value() - col1_.has_value() - col2_.has_value();
+  }
+};
diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt
index d06778e2d7..3a6543c2ec 100644
--- a/src/index/CMakeLists.txt
+++ b/src/index/CMakeLists.txt
@@ -6,4 +6,4 @@ add_library(index
         DocsDB.cpp FTSAlgorithms.cpp
         PrefixHeuristic.cpp CompressedRelation.cpp
         PatternCreator.cpp)
-qlever_target_link_libraries(index util parser vocabulary compilationInfo ${STXXL_LIBRARIES})
+qlever_target_link_libraries(index util parser vocabulary compilationInfo scanSpecification ${STXXL_LIBRARIES})
diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h
index 5c7179c096..a46e5c6ba9 100644
--- a/src/index/CompressedRelation.h
+++ b/src/index/CompressedRelation.h
@@ -9,6 +9,7 @@
 #include <type_traits>
 #include <vector>
 
+#include "engine/ScanSpecification.h"
 #include "engine/idTable/IdTable.h"
 #include "global/Id.h"
 #include "parser/data/LimitOffsetClause.h"
@@ -336,48 +337,6 @@ class CompressedRelationReader {
   using ColumnIndices = std::vector<ColumnIndex>;
   using CancellationHandle = ad_utility::SharedCancellationHandle;
 
-  // The specification of a scan operation for a given permutation.
-  // Can either be a full scan (all three elements are `std::nullopt`),
-  // a scan for a fixed `col0Id`, a scan for a fixed `col0Id` and `col1Id`,
-  // or even a scan for a single triple to check whether it is contained in
-  // the knowledge graph at all. The values which are `nullopt` become variables
-  // and are returned as columns in the result of the scan.
-  class ScanSpecification {
-   private:
-    using T = std::optional<Id>;
-
-    T col0Id_;
-    T col1Id_;
-    T col2Id_;
-
-    void validate() const {
-      bool c0 = col0Id_.has_value();
-      bool c1 = col1Id_.has_value();
-      bool c2 = col2Id_.has_value();
-      if (!c0) {
-        AD_CORRECTNESS_CHECK(!c1 && !c2);
-      }
-      if (!c1) {
-        AD_CORRECTNESS_CHECK(!c2);
-      }
-    }
-
-   public:
-    ScanSpecification(T col0Id, T col1Id, T col2Id)
-        : col0Id_{col0Id}, col1Id_{col1Id}, col2Id_{col2Id} {
-      validate();
-    }
-    const T& col0Id() const { return col0Id_; }
-    const T& col1Id() const { return col1Id_; }
-    const T& col2Id() const { return col2Id_; }
-
-    // Only used in tests.
-    void setCol1Id(T col1Id) {
-      col1Id_ = col1Id;
-      validate();
-    }
-  };
-
   // The metadata of a single relation together with a subset of its
   // blocks and possibly a `col1Id` for additional filtering. This is used as
   // the input to several functions below that take such an input.
diff --git a/src/index/Index.cpp b/src/index/Index.cpp
index f853d430df..8c2841ca22 100644
--- a/src/index/Index.cpp
+++ b/src/index/Index.cpp
@@ -286,8 +286,8 @@ IdTable Index::scan(
 
 // ____________________________________________________________________________
 IdTable Index::scan(
-    const Permutation::ScanSpecification& scanSpecification,
-    Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
+    const ScanSpecification& scanSpecification, Permutation::Enum p,
+    Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
     const LimitOffsetClause& limitOffset) const {
   return pimpl_->scan(scanSpecification, p, additionalColumns,
@@ -300,51 +300,3 @@ size_t Index::getResultSizeOfScan(
     const Permutation::Enum& permutation) const {
   return pimpl_->getResultSizeOfScan(scanSpecification, permutation);
 }
-
-// ____________________________________________________________________________
-std::optional<Permutation::ScanSpecification>
-ScanSpecificationAsTripleComponent::toScanSpecification(
-    const IndexImpl& index) const {
-  // TODO<C++23> Use `std::optional::transform`.
-  // TODO<SPARQL UPDATE>: We can also have LocalVocab entries is the
-  // ScanSpecification.
-  std::optional<Id> col0Id = col0_.has_value()
-                                 ? col0_.value().toValueId(index.getVocab())
-                                 : std::nullopt;
-  std::optional<Id> col1Id = col1_.has_value()
-                                 ? col1_.value().toValueId(index.getVocab())
-                                 : std::nullopt;
-  std::optional<Id> col2Id = col2_.has_value()
-                                 ? col2_.value().toValueId(index.getVocab())
-                                 : std::nullopt;
-  if (!col0Id.has_value() || (col1_.has_value() && !col1Id.has_value()) ||
-      (col2_.has_value() && !col2Id.has_value())) {
-    return std::nullopt;
-  }
-  return Permutation::ScanSpecification{col0Id, col1Id, col2Id};
-}
-
-// ____________________________________________________________________________
-ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
-                                                                       T col1,
-                                                                       T col2) {
-  auto isUnbound = [](const T& t) {
-    return !t.has_value() || t.value().isVariable();
-  };
-  if (isUnbound(col0)) {
-    AD_CONTRACT_CHECK(isUnbound(col1));
-  }
-  if (isUnbound(col1)) {
-    AD_CONTRACT_CHECK(isUnbound(col2));
-  }
-  auto toNulloptIfVariable = [](T& tc) -> std::optional<TripleComponent> {
-    if (tc.has_value() && tc.value().isVariable()) {
-      return std::nullopt;
-    } else {
-      return std::move(tc);
-    }
-  };
-  col0_ = toNulloptIfVariable(col0);
-  col1_ = toNulloptIfVariable(col1);
-  col2_ = toNulloptIfVariable(col2);
-}
diff --git a/src/index/Index.h b/src/index/Index.h
index a6c3fce5dc..e74690157e 100644
--- a/src/index/Index.h
+++ b/src/index/Index.h
@@ -23,24 +23,6 @@ class IdTable;
 class TextBlockMetaData;
 class IndexImpl;
 
-// TODO<joka921> Comment.
-class ScanSpecificationAsTripleComponent {
-  using T = std::optional<TripleComponent>;
-
- private:
-  std::optional<TripleComponent> col0_;
-  std::optional<TripleComponent> col1_;
-  std::optional<TripleComponent> col2_;
-
- public:
-  ScanSpecificationAsTripleComponent(T col0, T col1, T col2);
-  std::optional<Permutation::ScanSpecification> toScanSpecification(
-      const IndexImpl& index) const;
-  size_t numColumns() const {
-    return 3 - col0_.has_value() - col1_.has_value() - col2_.has_value();
-  }
-};
-
 class Index {
  private:
   // Pimpl to reduce compile times.
@@ -263,8 +245,7 @@ class Index {
                const LimitOffsetClause& limitOffset = {}) const;
 
   // Similar to the overload of `scan` above, but the keys are specified as IDs.
-  IdTable scan(const Permutation::ScanSpecification& scanSpecification,
-               Permutation::Enum p,
+  IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
                const LimitOffsetClause& limitOffset = {}) const;
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 71fec97cfa..b7596b5450 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1384,8 +1384,8 @@ IdTable IndexImpl::scan(
 }
 // _____________________________________________________________________________
 IdTable IndexImpl::scan(
-    const Permutation::ScanSpecification& scanSpecification,
-    Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns,
+    const ScanSpecification& scanSpecification, Permutation::Enum p,
+    Permutation::ColumnIndicesRef additionalColumns,
     const ad_utility::SharedCancellationHandle& cancellationHandle,
     const LimitOffsetClause& limitOffset) const {
   return getPermutation(p).scan(scanSpecification, additionalColumns,
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index 6caef088aa..d820ffc4d3 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -404,8 +404,7 @@ class IndexImpl {
                const LimitOffsetClause& limitOffset = {}) const;
 
   // _____________________________________________________________________________
-  IdTable scan(const Permutation::ScanSpecification& scanSpecification,
-               Permutation::Enum p,
+  IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p,
                Permutation::ColumnIndicesRef additionalColumns,
                const ad_utility::SharedCancellationHandle& cancellationHandle,
                const LimitOffsetClause& limitOffset = {}) const;
diff --git a/src/index/Permutation.h b/src/index/Permutation.h
index 2defb4c58e..db4b36635d 100644
--- a/src/index/Permutation.h
+++ b/src/index/Permutation.h
@@ -38,8 +38,6 @@ class Permutation {
   using ColumnIndices = CompressedRelationReader::ColumnIndices;
   using CancellationHandle = ad_utility::SharedCancellationHandle;
 
-  using ScanSpecification = CompressedRelationReader::ScanSpecification;
-
   // Convert a permutation to the corresponding string, etc. `PSO` is converted
   // to "PSO".
   static std::string_view toString(Enum permutation);
diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp
index c2aad9d7c6..24faf16c2e 100644
--- a/test/CompressedRelationsTest.cpp
+++ b/test/CompressedRelationsTest.cpp
@@ -174,8 +174,7 @@ void testCompressedRelations(const auto& inputs, std::string testCaseName,
     ASSERT_FLOAT_EQ(m.numRows_ / static_cast<float>(i + 1),
                     m.multiplicityCol1_);
     // Scan for all distinct `col0` and check that we get the expected result.
-    CompressedRelationReader::ScanSpecification scanSpec{
-        metaData[i].col0Id_, std::nullopt, std::nullopt};
+    ScanSpecification scanSpec{metaData[i].col0Id_, std::nullopt, std::nullopt};
     IdTable table =
         reader.scan(scanSpec, blocks, additionalColumns, cancellationHandle);
     const auto& col1And2 = inputs[i].col1And2_;
@@ -207,8 +206,8 @@ void testCompressedRelations(const auto& inputs, std::string testCaseName,
     std::vector<std::array<int, 1>> col3;
 
     auto scanAndCheck = [&]() {
-      CompressedRelationReader::ScanSpecification scanSpec{
-          metaData[i].col0Id_, V(lastCol1Id), std::nullopt};
+      ScanSpecification scanSpec{metaData[i].col0Id_, V(lastCol1Id),
+                                 std::nullopt};
       auto size = reader.getResultSizeOfScan(scanSpec, blocks);
       IdTable tableWidthOne =
           reader.scan(scanSpec, blocks, Permutation::ColumnIndicesRef{},
diff --git a/test/TriplesViewTest.cpp b/test/TriplesViewTest.cpp
index 4c9fd53825..956100991c 100644
--- a/test/TriplesViewTest.cpp
+++ b/test/TriplesViewTest.cpp
@@ -30,7 +30,7 @@ struct DummyPermutation {
   }
 
   cppcoro::generator<IdTable> lazyScan(
-      CompressedRelationReader::ScanSpecification scanSpec,
+      ScanSpecification scanSpec,
       std::optional<std::vector<CompressedBlockMetadata>> blocks,
       std::span<const ColumnIndex>, const auto&) const {
     AD_CORRECTNESS_CHECK(!blocks.has_value());

From fe183000e099c15e2852ae23499bea4f3a60b19a Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 1 Aug 2024 19:34:02 +0200
Subject: [PATCH 08/14] A round of reviews.

---
 src/engine/CMakeLists.txt           |  2 +-
 src/engine/CartesianProductJoin.cpp | 18 +++++++++++-
 src/engine/IndexScan.cpp            | 29 +++++--------------
 src/engine/IndexScan.h              |  2 +-
 src/engine/QueryPlanner.cpp         |  1 +
 src/engine/ScanSpecification.cpp    | 45 ++++++++++++++++-------------
 src/engine/ScanSpecification.h      |  7 +++--
 7 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index a21afc093c..4587406bbe 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -17,4 +17,4 @@ qlever_target_link_libraries(engine util index parser sparqlExpressions http Sor
 
 add_library(scanSpecification ScanSpecification.cpp)
 qlever_target_link_libraries(scanSpecification index)
-qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
+qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
\ No newline at end of file
diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp
index 8f8147f5c2..d2ada341de 100644
--- a/src/engine/CartesianProductJoin.cpp
+++ b/src/engine/CartesianProductJoin.cpp
@@ -154,10 +154,18 @@ ProtoResult CartesianProductJoin::computeResult(
       child.setLimit(limitIfPresent.value());
     }
     subResults.push_back(child.getResult());
+
+    const auto& table = subResults.back()->idTable();
     // Early stopping: If one of the results is empty, we can stop early.
-    if (subResults.back()->idTable().size() == 0) {
+    if (table.size() == 0) {
       break;
     }
+
+    // If one of the children is the neutral element (because of a triple with
+    // zero variables), we can simply ignore it here.
+    if (table.numRows() == 1 && table.numColumns() == 0) {
+      subResults.pop_back();
+    }
     // Example for the following calculation: If we have a LIMIT of 1000 and
     // the first child already has a result of size 100, then the second child
     // needs to evaluate only its first 10 results. The +1 is because integer
@@ -169,6 +177,14 @@ ProtoResult CartesianProductJoin::computeResult(
     }
   }
 
+  // TODO<joka921> find a solution to simply return the subresult.
+  // This can probably be done by using the `ProtoResult`.
+  /*
+  if (subResults.size() == 1) {
+    return subResults.at(0);
+  }
+   */
+
   auto sizesView = std::views::transform(
       subResults, [](const auto& child) { return child->idTable().size(); });
   auto totalResultSize = std::accumulate(sizesView.begin(), sizesView.end(),
diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
index 93d0f2b0eb..15940b525b 100644
--- a/src/engine/IndexScan.cpp
+++ b/src/engine/IndexScan.cpp
@@ -125,7 +125,7 @@ ProtoResult IndexScan::computeResult([[maybe_unused]] bool requestLaziness) {
   idTable.setNumColumns(numVariables_);
   const auto& index = _executionContext->getIndex();
   if (numVariables_ < 3) {
-    idTable = index.scan(getPermutedTripleNoVariables(), permutation_,
+    idTable = index.scan(getScanSpecification(), permutation_,
                          additionalColumns(), cancellationHandle_, getLimit());
   } else {
     AD_CORRECTNESS_CHECK(numVariables_ == 3);
@@ -143,8 +143,7 @@ size_t IndexScan::computeSizeEstimate() const {
   AD_CORRECTNESS_CHECK(_executionContext);
   // We have to do a simple scan anyway so might as well do it now
   if (numVariables_ < 3) {
-    return getIndex().getResultSizeOfScan(getPermutedTripleNoVariables(),
-                                          permutation_);
+    return getIndex().getResultSizeOfScan(getScanSpecification(), permutation_);
   } else {
     // The triple consists of three variables.
     // TODO<joka921> As soon as all implementations of a full index scan
@@ -235,8 +234,7 @@ std::array<const TripleComponent* const, 3> IndexScan::getPermutedTriple()
 }
 
 // ___________________________________________________________________________
-ScanSpecificationAsTripleComponent IndexScan::getPermutedTripleNoVariables()
-    const {
+ScanSpecificationAsTripleComponent IndexScan::getScanSpecification() const {
   auto permutedTriple = getPermutedTriple();
   return {*permutedTriple[0], *permutedTriple[1], *permutedTriple[2]};
 }
@@ -274,26 +272,13 @@ Permutation::IdTableGenerator IndexScan::getLazyScan(
 // ________________________________________________________________
 std::optional<Permutation::MetadataAndBlocks> IndexScan::getMetadataForScan(
     const IndexScan& s) {
-  auto permutedTriple = s.getPermutedTriple();
-  const IndexImpl& index = s.getIndex().getImpl();
-  auto numVars = s.numVariables_;
-  std::optional<Id> col0Id =
-      numVars == 3 ? std::nullopt
-                   : permutedTriple[0]->toValueId(index.getVocab());
-  std::optional<Id> col1Id =
-      numVars >= 2 ? std::nullopt
-                   : permutedTriple[1]->toValueId(index.getVocab());
-  std::optional<Id> col2Id =
-      numVars >= 1 ? std::nullopt
-                   : permutedTriple[2]->toValueId(index.getVocab());
-  if ((!col0Id.has_value() && numVars < 3) ||
-      (!col1Id.has_value() && numVars < 2) ||
-      (!col2Id.has_value() && numVars < 1)) {
+  const auto& index = s.getExecutionContext()->getIndex().getImpl();
+  auto scanSpec = s.getScanSpecification().toScanSpecification(index);
+  if (!scanSpec.has_value()) {
     return std::nullopt;
   }
-
   return index.getPermutation(s.permutation())
-      .getMetadataAndBlocks({col0Id, col1Id, col2Id});
+      .getMetadataAndBlocks(scanSpec.value());
 };
 
 // ________________________________________________________________
diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h
index f2fa4611cc..5914e27d71 100644
--- a/src/engine/IndexScan.h
+++ b/src/engine/IndexScan.h
@@ -100,7 +100,7 @@ class IndexScan final : public Operation {
   // `permutation_`. For example if `permutation_ == PSO` then the result is
   // {&predicate_, &subject_, &object_}
   std::array<const TripleComponent* const, 3> getPermutedTriple() const;
-  ScanSpecificationAsTripleComponent getPermutedTripleNoVariables() const;
+  ScanSpecificationAsTripleComponent getScanSpecification() const;
 
  private:
   ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp
index f12a4f6e4b..26f6077270 100644
--- a/src/engine/QueryPlanner.cpp
+++ b/src/engine/QueryPlanner.cpp
@@ -630,6 +630,7 @@ void QueryPlanner::seedFromOrdinaryTriple(
                          static_cast<size_t>(isVariable(triple.p_)) +
                          static_cast<size_t>(isVariable(triple.o_));
   if (numVars == 0) {
+    // We could read this from any of the permutations.
     addIndexScan(Permutation::Enum::PSO);
   } else if (numVars == 1) {
     indexScanSingleVarCase(triple, addIndexScan);
diff --git a/src/engine/ScanSpecification.cpp b/src/engine/ScanSpecification.cpp
index bc0730bc79..d546491567 100644
--- a/src/engine/ScanSpecification.cpp
+++ b/src/engine/ScanSpecification.cpp
@@ -13,17 +13,24 @@ ScanSpecificationAsTripleComponent::toScanSpecification(
   // TODO<C++23> Use `std::optional::transform`.
   // TODO<SPARQL UPDATE>: We can also have LocalVocab entries is the
   // ScanSpecification.
-  std::optional<Id> col0Id = col0_.has_value()
-                                 ? col0_.value().toValueId(index.getVocab())
-                                 : std::nullopt;
-  std::optional<Id> col1Id = col1_.has_value()
-                                 ? col1_.value().toValueId(index.getVocab())
-                                 : std::nullopt;
-  std::optional<Id> col2Id = col2_.has_value()
-                                 ? col2_.value().toValueId(index.getVocab())
-                                 : std::nullopt;
-  if (!col0Id.has_value() || (col1_.has_value() && !col1Id.has_value()) ||
-      (col2_.has_value() && !col2Id.has_value())) {
+  bool nonexistingVocabEntryFound = false;
+  auto getId =
+      [&index, &nonexistingVocabEntryFound](
+          const std::optional<TripleComponent>& tc) -> std::optional<Id> {
+    if (!tc.has_value()) {
+      return std::nullopt;
+    }
+    auto id = tc.value().toValueId(index.getVocab());
+    if (!id.has_value()) {
+      nonexistingVocabEntryFound = true;
+    }
+    return id;
+  };
+  std::optional<Id> col0Id = getId(col0_);
+  std::optional<Id> col1Id = getId(col1_);
+  std::optional<Id> col2Id = getId(col2_);
+
+  if (nonexistingVocabEntryFound) {
     return std::nullopt;
   }
   return ScanSpecification{col0Id, col1Id, col2Id};
@@ -33,15 +40,6 @@ ScanSpecificationAsTripleComponent::toScanSpecification(
 ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
                                                                        T col1,
                                                                        T col2) {
-  auto isUnbound = [](const T& t) {
-    return !t.has_value() || t.value().isVariable();
-  };
-  if (isUnbound(col0)) {
-    AD_CONTRACT_CHECK(isUnbound(col1));
-  }
-  if (isUnbound(col1)) {
-    AD_CONTRACT_CHECK(isUnbound(col2));
-  }
   auto toNulloptIfVariable = [](T& tc) -> std::optional<TripleComponent> {
     if (tc.has_value() && tc.value().isVariable()) {
       return std::nullopt;
@@ -52,4 +50,11 @@ ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
   col0_ = toNulloptIfVariable(col0);
   col1_ = toNulloptIfVariable(col1);
   col2_ = toNulloptIfVariable(col2);
+
+  if (!col0.has_value()) {
+    AD_CONTRACT_CHECK(!col1.has_value());
+  }
+  if (col1.has_value()) {
+    AD_CONTRACT_CHECK(!col2.has_value());
+  }
 }
diff --git a/src/engine/ScanSpecification.h b/src/engine/ScanSpecification.h
index e315fd29ba..9935fe64fc 100644
--- a/src/engine/ScanSpecification.h
+++ b/src/engine/ScanSpecification.h
@@ -5,6 +5,7 @@
 #pragma once
 #include <optional>
 
+#include "engine/LocalVocab.h"
 #include "global/Id.h"
 #include "parser/TripleComponent.h"
 
@@ -24,6 +25,7 @@ class ScanSpecification {
   T col0Id_;
   T col1Id_;
   T col2Id_;
+  friend class ScanSpecificationAsTripleComponent;
 
   void validate() const {
     bool c0 = col0Id_.has_value();
@@ -72,8 +74,9 @@ class ScanSpecificationAsTripleComponent {
 
   // Convert to a `ScanSpecification`. The `index` is used to convert the
   // `TripleComponent` to `Id`s by looking them up in the vocabulary. Return
-  // `nullopt` if one of the vocab lookup fails. Note: Once we implement SPARQL
-  // UPDATE, we possibly also have to use a `LocalVocab` here.
+  // `nullopt` if and only if one of the vocab lookup fails and the result of
+  // the corresponding scan will thus always be empty. Note: Once we implement
+  // SPARQL UPDATE, we possibly also have to use a `LocalVocab` here.
   // TODO<joka921> Try out if this can be done just now.
   std::optional<ScanSpecification> toScanSpecification(
       const IndexImpl& index) const;

From 6cabb80689c8b384f9b234ef5496ec2e6e5243b8 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 1 Aug 2024 19:38:42 +0200
Subject: [PATCH 09/14] A small change.

---
 src/engine/CMakeLists.txt      | 2 +-
 src/engine/ScanSpecification.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index 4587406bbe..a21afc093c 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -17,4 +17,4 @@ qlever_target_link_libraries(engine util index parser sparqlExpressions http Sor
 
 add_library(scanSpecification ScanSpecification.cpp)
 qlever_target_link_libraries(scanSpecification index)
-qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
\ No newline at end of file
+qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
diff --git a/src/engine/ScanSpecification.h b/src/engine/ScanSpecification.h
index 9935fe64fc..dfdf931a56 100644
--- a/src/engine/ScanSpecification.h
+++ b/src/engine/ScanSpecification.h
@@ -74,10 +74,10 @@ class ScanSpecificationAsTripleComponent {
 
   // Convert to a `ScanSpecification`. The `index` is used to convert the
   // `TripleComponent` to `Id`s by looking them up in the vocabulary. Return
-  // `nullopt` if and only if one of the vocab lookup fails and the result of
-  // the corresponding scan will thus always be empty. Note: Once we implement
-  // SPARQL UPDATE, we possibly also have to use a `LocalVocab` here.
-  // TODO<joka921> Try out if this can be done just now.
+  // `nullopt` if and only if one of the vocab lookup fails (then the result of
+  // the corresponding scan will be empty).
+  // TODO<joka921> Once we implement SPARQL UPDATE, we possibly also to use the
+  // `LocalVocab` of the UPDATE triples here.
   std::optional<ScanSpecification> toScanSpecification(
       const IndexImpl& index) const;
   size_t numColumns() const {

From f9897ce6b083b258fa8e29ebd63864080db792b6 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 1 Aug 2024 19:43:31 +0200
Subject: [PATCH 10/14] BugfixOfBugfix

---
 src/engine/ScanSpecification.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/engine/ScanSpecification.cpp b/src/engine/ScanSpecification.cpp
index d546491567..4ede911a10 100644
--- a/src/engine/ScanSpecification.cpp
+++ b/src/engine/ScanSpecification.cpp
@@ -54,7 +54,7 @@ ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
   if (!col0.has_value()) {
     AD_CONTRACT_CHECK(!col1.has_value());
   }
-  if (col1.has_value()) {
+  if (!col1.has_value()) {
     AD_CONTRACT_CHECK(!col2.has_value());
   }
 }

From d71a2fccbeb3c8a5e7198da08907c797f595300d Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 1 Aug 2024 19:48:21 +0200
Subject: [PATCH 11/14] Last thing that is left is To add the missing unit
 tests.

---
 src/engine/CartesianProductJoin.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp
index d2ada341de..ed4541d19a 100644
--- a/src/engine/CartesianProductJoin.cpp
+++ b/src/engine/CartesianProductJoin.cpp
@@ -177,13 +177,9 @@ ProtoResult CartesianProductJoin::computeResult(
     }
   }
 
-  // TODO<joka921> find a solution to simply return the subresult.
-  // This can probably be done by using the `ProtoResult`.
-  /*
-  if (subResults.size() == 1) {
-    return subResults.at(0);
-  }
-   */
+  // TODO<joka921> Find a solution to cheaply handle the case, that only a
+  // single result is left. This can probably be done by using the
+  // `ProtoResult`.
 
   auto sizesView = std::views::transform(
       subResults, [](const auto& child) { return child->idTable().size(); });

From 2aefcb4bb4ace0e1b60a63c9149c10819a6bf82a Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 2 Aug 2024 11:00:37 +0200
Subject: [PATCH 12/14] Add unit tests and inevitably fix the bugs that those
 tests discovered.

---
 src/engine/CMakeLists.txt                |  2 +-
 src/engine/CartesianProductJoin.cpp      |  3 +-
 src/engine/CartesianProductJoin.h        |  6 +-
 src/engine/Operation.cpp                 |  2 +-
 src/engine/Operation.h                   |  2 +-
 src/engine/QueryExecutionContext.h       |  9 +--
 src/engine/ScanSpecification.cpp         | 37 +++++++++--
 src/engine/ScanSpecification.h           | 23 +++----
 src/engine/idTable/IdTable.h             |  2 +-
 src/parser/CMakeLists.txt                |  2 +-
 src/util/ConcurrentCache.h               |  2 +-
 test/ValuesForTestingTest.cpp            | 16 ++++-
 test/engine/CMakeLists.txt               |  1 +
 test/engine/CartesianProductJoinTest.cpp | 16 ++++-
 test/engine/ScanSpecificationTest.cpp    | 81 ++++++++++++++++++++++++
 test/engine/ValuesForTesting.h           |  3 +-
 16 files changed, 163 insertions(+), 44 deletions(-)
 create mode 100644 test/engine/ScanSpecificationTest.cpp

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index a21afc093c..fbf6cad996 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -16,5 +16,5 @@ add_library(engine
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams)
 
 add_library(scanSpecification ScanSpecification.cpp)
-qlever_target_link_libraries(scanSpecification index)
+qlever_target_link_libraries(scanSpecification index engine)
 qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp
index ed4541d19a..c73361e001 100644
--- a/src/engine/CartesianProductJoin.cpp
+++ b/src/engine/CartesianProductJoin.cpp
@@ -157,7 +157,7 @@ ProtoResult CartesianProductJoin::computeResult(
 
     const auto& table = subResults.back()->idTable();
     // Early stopping: If one of the results is empty, we can stop early.
-    if (table.size() == 0) {
+    if (table.empty()) {
       break;
     }
 
@@ -165,6 +165,7 @@ ProtoResult CartesianProductJoin::computeResult(
     // zero variables), we can simply ignore it here.
     if (table.numRows() == 1 && table.numColumns() == 0) {
       subResults.pop_back();
+      continue;
     }
     // Example for the following calculation: If we have a LIMIT of 1000 and
     // the first child already has a result of size 100, then the second child
diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h
index 96047d62fa..779adf1dba 100644
--- a/src/engine/CartesianProductJoin.h
+++ b/src/engine/CartesianProductJoin.h
@@ -2,9 +2,7 @@
 //                  Chair of Algorithms and Data Structures.
 //  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
-#ifndef QLEVER_CARTESIANPRODUCTJOIN_H
-#define QLEVER_CARTESIANPRODUCTJOIN_H
-
+#pragma once
 #include "engine/Operation.h"
 #include "engine/QueryExecutionTree.h"
 
@@ -92,5 +90,3 @@ class CartesianProductJoin : public Operation {
                          std::span<const Id> inputColumn, size_t groupSize,
                          size_t offset);
 };
-
-#endif  // QLEVER_CARTESIANPRODUCTJOIN_H
diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp
index b29e2c6098..4293aa2c9c 100644
--- a/src/engine/Operation.cpp
+++ b/src/engine/Operation.cpp
@@ -251,7 +251,7 @@ void Operation::updateRuntimeInformationOnSuccess(
 
 // ____________________________________________________________________________________________________________________
 void Operation::updateRuntimeInformationOnSuccess(
-    const ConcurrentLruCache ::ResultAndCacheStatus& resultAndCacheStatus,
+    const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus,
     Milliseconds duration) {
   updateRuntimeInformationOnSuccess(
       *resultAndCacheStatus._resultPointer->resultTable(),
diff --git a/src/engine/Operation.h b/src/engine/Operation.h
index 920b09594f..9e23ee3fcf 100644
--- a/src/engine/Operation.h
+++ b/src/engine/Operation.h
@@ -263,7 +263,7 @@ class Operation {
   // Create and store the complete runtime information for this operation after
   // it has either been successfully computed or read from the cache.
   virtual void updateRuntimeInformationOnSuccess(
-      const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus,
+      const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus,
       Milliseconds duration) final;
 
   // Similar to the function above, but the components are specified manually.
diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h
index 8311c32eb7..e7c1ff37f6 100644
--- a/src/engine/QueryExecutionContext.h
+++ b/src/engine/QueryExecutionContext.h
@@ -53,15 +53,8 @@ class CacheValue {
 // Threadsafe LRU cache for (partial) query results, that
 // checks on insertion, if the result is currently being computed
 // by another query.
-using ConcurrentLruCache = ad_utility::ConcurrentCache<
+using QueryResultCache = ad_utility::ConcurrentCache<
     ad_utility::LRUCache<string, CacheValue, CacheValue::SizeGetter>>;
-class QueryResultCache : public ConcurrentLruCache {
- public:
-  virtual ~QueryResultCache() = default;
-  void clearAll() override { ConcurrentLruCache::clearAll(); }
-  // Inherit the constructor.
-  using ConcurrentLruCache::ConcurrentLruCache;
-};
 
 // Execution context for queries.
 // Holds references to index and engine, implements caching.
diff --git a/src/engine/ScanSpecification.cpp b/src/engine/ScanSpecification.cpp
index 4ede911a10..ee8a0543d3 100644
--- a/src/engine/ScanSpecification.cpp
+++ b/src/engine/ScanSpecification.cpp
@@ -4,8 +4,16 @@
 
 #include "engine/ScanSpecification.h"
 
+#include "index/Index.h"
 #include "index/IndexImpl.h"
 
+// ____________________________________________________________________________
+std::optional<ScanSpecification>
+ScanSpecificationAsTripleComponent::toScanSpecification(
+    const Index& index) const {
+  return toScanSpecification(index.getImpl());
+}
+
 // ____________________________________________________________________________
 std::optional<ScanSpecification>
 ScanSpecificationAsTripleComponent::toScanSpecification(
@@ -51,10 +59,31 @@ ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0,
   col1_ = toNulloptIfVariable(col1);
   col2_ = toNulloptIfVariable(col2);
 
-  if (!col0.has_value()) {
-    AD_CONTRACT_CHECK(!col1.has_value());
+  if (!col0_.has_value()) {
+    AD_CONTRACT_CHECK(!col1_.has_value());
+  }
+  if (!col1_.has_value()) {
+    AD_CONTRACT_CHECK(!col2_.has_value());
+  }
+}
+
+// ____________________________________________________________________________
+size_t ScanSpecificationAsTripleComponent::numColumns() const {
+  auto i = [](const auto& x) -> size_t {
+    return static_cast<size_t>(x.has_value());
+  };
+  return 3 - i(col0_) - i(col1_) - i(col2_);
+}
+
+// _____________________________________________________________________________
+void ScanSpecification::validate() const {
+  bool c0 = col0Id_.has_value();
+  bool c1 = col1Id_.has_value();
+  bool c2 = col2Id_.has_value();
+  if (!c0) {
+    AD_CORRECTNESS_CHECK(!c1 && !c2);
   }
-  if (!col1.has_value()) {
-    AD_CONTRACT_CHECK(!col2.has_value());
+  if (!c1) {
+    AD_CORRECTNESS_CHECK(!c2);
   }
 }
diff --git a/src/engine/ScanSpecification.h b/src/engine/ScanSpecification.h
index dfdf931a56..de8e915cee 100644
--- a/src/engine/ScanSpecification.h
+++ b/src/engine/ScanSpecification.h
@@ -11,6 +11,7 @@
 
 // Forward declaration
 class IndexImpl;
+class Index;
 
 // The specification of a scan operation for a given permutation.
 // Can either be a full scan (all three elements are `std::nullopt`),
@@ -27,17 +28,7 @@ class ScanSpecification {
   T col2Id_;
   friend class ScanSpecificationAsTripleComponent;
 
-  void validate() const {
-    bool c0 = col0Id_.has_value();
-    bool c1 = col1Id_.has_value();
-    bool c2 = col2Id_.has_value();
-    if (!c0) {
-      AD_CORRECTNESS_CHECK(!c1 && !c2);
-    }
-    if (!c1) {
-      AD_CORRECTNESS_CHECK(!c2);
-    }
-  }
+  void validate() const;
 
  public:
   ScanSpecification(T col0Id, T col1Id, T col2Id)
@@ -48,6 +39,8 @@ class ScanSpecification {
   const T& col1Id() const { return col1Id_; }
   const T& col2Id() const { return col2Id_; }
 
+  bool operator==(const ScanSpecification&) const = default;
+
   // Only used in tests.
   void setCol1Id(T col1Id) {
     col1Id_ = col1Id;
@@ -80,7 +73,9 @@ class ScanSpecificationAsTripleComponent {
   // `LocalVocab` of the UPDATE triples here.
   std::optional<ScanSpecification> toScanSpecification(
       const IndexImpl& index) const;
-  size_t numColumns() const {
-    return 3 - col0_.has_value() - col1_.has_value() - col2_.has_value();
-  }
+  std::optional<ScanSpecification> toScanSpecification(
+      const Index& index) const;
+
+  // The number of columns that the corresponding index scan will have.
+  size_t numColumns() const;
 };
diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h
index 4377b02445..c8fb92d84e 100644
--- a/src/engine/idTable/IdTable.h
+++ b/src/engine/idTable/IdTable.h
@@ -1,4 +1,4 @@
-//  Copyright 2021, University of Freiburg, Chair of Algorithms and Data
+///  Copyright 2021, University of Freiburg, Chair of Algorithms and Data
 //  Structures. Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
 
 #pragma once
diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt
index 29ca836c71..8b1efdcb1e 100644
--- a/src/parser/CMakeLists.txt
+++ b/src/parser/CMakeLists.txt
@@ -25,5 +25,5 @@ add_library(parser
         Iri.cpp
         Literal.cpp
         LiteralOrIri.cpp)
-qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine)
+qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine index)
 
diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h
index e76e2980b7..de8d0f270e 100644
--- a/src/util/ConcurrentCache.h
+++ b/src/util/ConcurrentCache.h
@@ -206,7 +206,7 @@ class ConcurrentCache {
   }
 
   /// Clear the cache, including the pinned entries.
-  virtual void clearAll() { _cacheAndInProgressMap.wlock()->_cache.clearAll(); }
+  void clearAll() { _cacheAndInProgressMap.wlock()->_cache.clearAll(); }
 
   /// Delete elements from the unpinned part of the cache of total size
   /// at least `size`;
diff --git a/test/ValuesForTestingTest.cpp b/test/ValuesForTestingTest.cpp
index 95ce76ce46..8c4b86d019 100644
--- a/test/ValuesForTestingTest.cpp
+++ b/test/ValuesForTestingTest.cpp
@@ -25,9 +25,9 @@ TEST(ValuesForTesting, valuesForTesting) {
   ASSERT_EQ(v.getMultiplicity(0), 42.0);
   ASSERT_EQ(v.getMultiplicity(1), 84.0);
 
-  ASSERT_THAT(
-      v.getCacheKey(),
-      ::testing::StartsWith("Values for testing with 2 columns. V:3 V:12"));
+  ASSERT_THAT(v.getCacheKey(),
+              ::testing::StartsWith(
+                  "Values for testing with 2 columns and 3 rows. V:3 V:12"));
   ASSERT_THAT(v.getCacheKey(), ::testing::EndsWith("Supports limit: 0"));
   ASSERT_EQ(v.getDescriptor(), "explicit values for testing");
   ASSERT_TRUE(v.resultSortedOn().empty());
@@ -36,3 +36,13 @@ TEST(ValuesForTesting, valuesForTesting) {
   auto result = v.getResult();
   ASSERT_EQ(result->idTable(), table);
 }
+
+// ____________________________________________________________________________
+TEST(ValuesForTesting, cornerCasesCacheKey) {
+  auto empty = makeIdTableFromVector({});
+  auto neutral = makeIdTableFromVector({{}});
+
+  ValuesForTesting vEmpty{getQec(), empty.clone(), {}};
+  ValuesForTesting vNeutral{getQec(), neutral.clone(), {}};
+  EXPECT_NE(vEmpty.getCacheKey(), vNeutral.getCacheKey());
+}
diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt
index 4f6e09ff5f..d64860a72d 100644
--- a/test/engine/CMakeLists.txt
+++ b/test/engine/CMakeLists.txt
@@ -4,3 +4,4 @@ addLinkAndDiscoverTest(CartesianProductJoinTest engine)
 addLinkAndDiscoverTest(TextIndexScanForWordTest engine)
 addLinkAndDiscoverTest(TextIndexScanForEntityTest engine)
 addLinkAndDiscoverTest(DistinctTest engine)
+addLinkAndDiscoverTestSerial(ScanSpecificationTest engine index parser scanSpecification)
diff --git a/test/engine/CartesianProductJoinTest.cpp b/test/engine/CartesianProductJoinTest.cpp
index f07bb11218..cd05e31b9b 100644
--- a/test/engine/CartesianProductJoinTest.cpp
+++ b/test/engine/CartesianProductJoinTest.cpp
@@ -90,9 +90,21 @@ void testCartesianProduct(VectorTable expected, std::vector<VectorTable> inputs,
 TEST(CartesianProductJoin, computeResult) {
   // Simple base cases.
   VectorTable v{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}};
+  VectorTable empty{};
   testCartesianProduct(v, {v});
-  testCartesianProduct({}, {{}, v, {}});
-  testCartesianProduct({}, {{}, {}});
+  testCartesianProduct(empty, {empty, v, empty});
+  testCartesianProduct(empty, {empty, empty});
+
+  // Test cases where some or all of the inputs are Neutral elements (1 row,
+  // zero columns) that are automatically filtered out by the
+  // `CartesianProductJoin`.
+  VectorTable neutral{{}};
+  testCartesianProduct(neutral, {neutral});
+  testCartesianProduct(v, {v, neutral});
+  testCartesianProduct(v, {neutral, v, neutral});
+  testCartesianProduct(neutral, {neutral, neutral, neutral});
+  testCartesianProduct(empty, {neutral, empty, neutral});
+  testCartesianProduct(empty, {neutral, empty, v});
 
   // Fails because of an empty input.
   EXPECT_ANY_THROW(makeJoin({}));
diff --git a/test/engine/ScanSpecificationTest.cpp b/test/engine/ScanSpecificationTest.cpp
new file mode 100644
index 0000000000..63c4386d67
--- /dev/null
+++ b/test/engine/ScanSpecificationTest.cpp
@@ -0,0 +1,81 @@
+//  Copyright 2024, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include <gmock/gmock.h>
+
+#include "../util/GTestHelpers.h"
+#include "../util/IndexTestHelpers.h"
+#include "engine/ScanSpecification.h"
+
+// _____________________________________________________________________________
+TEST(ScanSpecification, validate) {
+  Id i = Id::makeFromInt(42);
+  auto n = std::nullopt;
+  using S = ScanSpecification;
+  EXPECT_NO_THROW(S(i, i, i));
+  EXPECT_NO_THROW(S(i, i, n));
+  EXPECT_NO_THROW(S(i, n, n));
+  EXPECT_NO_THROW(S(n, n, n));
+
+  EXPECT_ANY_THROW(S(n, i, i));
+  EXPECT_ANY_THROW(S(n, n, i));
+  EXPECT_ANY_THROW(S(n, i, n));
+  EXPECT_ANY_THROW(S(i, n, i));
+}
+
+// _____________________________________________________________________________
+TEST(ScanSpecification, ScanSpecificationAsTripleComponent) {
+  Id i = Id::makeFromInt(42);
+  TripleComponent iTc{42};
+  auto n = std::nullopt;
+  using S = ScanSpecification;
+  using STc = ScanSpecificationAsTripleComponent;
+
+  EXPECT_ANY_THROW(STc(n, iTc, iTc));
+  EXPECT_ANY_THROW(STc(n, n, iTc));
+  EXPECT_ANY_THROW(STc(n, iTc, n));
+  EXPECT_ANY_THROW(STc(iTc, n, iTc));
+
+  const auto& index = ad_utility::testing::getQec()->getIndex();
+  auto toScanSpec = [&index](const STc& s) {
+    return s.toScanSpecification(index);
+  };
+
+  // Match that a `ScanSpecificationAsTripleComponent` has the expected number
+  // of columns, and yields the expected `ScanSpecification` when
+  // `toScanSpecification` is called on it.
+  auto matchScanSpec =
+      [&toScanSpec](const std::optional<ScanSpecification> spec,
+                    size_t numColumns = 0) -> ::testing::Matcher<const STc&> {
+    auto innerMatcher = [&toScanSpec, &spec] {
+      return ::testing::ResultOf(toScanSpec, ::testing::Eq(spec));
+    };
+    if (!spec.has_value()) {
+      return innerMatcher();
+    } else {
+      return ::testing::AllOf(
+          innerMatcher(),
+          AD_PROPERTY(STc, numColumns, ::testing::Eq(numColumns)));
+    }
+  };
+  EXPECT_THAT(STc(iTc, iTc, iTc), matchScanSpec(S(i, i, i), 0));
+  EXPECT_THAT(STc(iTc, iTc, n), matchScanSpec(S(i, i, n), 1));
+  EXPECT_THAT(STc(iTc, n, n), matchScanSpec(S(i, n, n), 2));
+  EXPECT_THAT(STc(n, n, n), matchScanSpec(S(n, n, n), 3));
+
+  // Test the resolution of vocab entries.
+  auto getId = ad_utility::testing::makeGetId(index);
+  auto x = getId("<x>");
+  TripleComponent xIri = TripleComponent::Iri::fromIriref("<x>");
+
+  EXPECT_THAT(STc(xIri, xIri, xIri), matchScanSpec(S(x, x, x), 0));
+
+  // For an entry that is not in the vocabulary, the complete result of
+  // `toScanSpecification` is `nullopt`.
+  TripleComponent notInVocab =
+      TripleComponent::Iri::fromIriref("<thisIriIsNotContained>");
+  EXPECT_THAT(STc(notInVocab, xIri, xIri), matchScanSpec(std::nullopt));
+  EXPECT_THAT(STc(xIri, notInVocab, xIri), matchScanSpec(std::nullopt));
+  EXPECT_THAT(STc(xIri, xIri, notInVocab), matchScanSpec(std::nullopt));
+}
diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h
index dcbf130da8..ac7e363a95 100644
--- a/test/engine/ValuesForTesting.h
+++ b/test/engine/ValuesForTesting.h
@@ -65,7 +65,8 @@ class ValuesForTesting : public Operation {
   // ___________________________________________________________________________
   string getCacheKeyImpl() const override {
     std::stringstream str;
-    str << "Values for testing with " << table_.numColumns() << " columns. ";
+    str << "Values for testing with " << table_.numColumns() << " columns and "
+        << table_.numRows() << " rows. ";
     if (table_.numRows() > 1000) {
       str << ad_utility::FastRandomIntGenerator<int64_t>{}();
     } else {

From a70579989cbe3dd9af00bb4e6df7a9ed9e30fbdd Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 2 Aug 2024 11:13:14 +0200
Subject: [PATCH 13/14] Fix linker troubles...

---
 src/engine/CMakeLists.txt                        | 4 ----
 src/index/CMakeLists.txt                         | 4 ++--
 src/index/CompressedRelation.h                   | 2 +-
 src/{engine => index}/ScanSpecification.cpp      | 2 +-
 src/{engine => index}/ScanSpecification.h        | 0
 test/engine/CMakeLists.txt                       | 1 -
 test/index/CMakeLists.txt                        | 1 +
 test/{engine => index}/ScanSpecificationTest.cpp | 2 +-
 8 files changed, 6 insertions(+), 10 deletions(-)
 rename src/{engine => index}/ScanSpecification.cpp (98%)
 rename src/{engine => index}/ScanSpecification.h (100%)
 rename test/{engine => index}/ScanSpecificationTest.cpp (98%)

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index fbf6cad996..92266f8de1 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -14,7 +14,3 @@ add_library(engine
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp LocalVocabEntry.cpp)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams)
-
-add_library(scanSpecification ScanSpecification.cpp)
-qlever_target_link_libraries(scanSpecification index engine)
-qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams)
diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt
index 7d4e7dd5b1..e41fd2471f 100644
--- a/src/index/CMakeLists.txt
+++ b/src/index/CMakeLists.txt
@@ -5,5 +5,5 @@ add_library(index
         LocatedTriples.cpp Permutation.cpp TextMetaData.cpp
         DocsDB.cpp FTSAlgorithms.cpp
         PrefixHeuristic.cpp CompressedRelation.cpp
-        PatternCreator.cpp)
-qlever_target_link_libraries(index util parser vocabulary compilationInfo scanSpecification ${STXXL_LIBRARIES})
+        PatternCreator.cpp ScanSpecification.cpp)
+qlever_target_link_libraries(index util parser vocabulary compilationInfo ${STXXL_LIBRARIES})
diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h
index 42d3a3c200..af8725ec9a 100644
--- a/src/index/CompressedRelation.h
+++ b/src/index/CompressedRelation.h
@@ -9,9 +9,9 @@
 #include <type_traits>
 #include <vector>
 
-#include "engine/ScanSpecification.h"
 #include "engine/idTable/IdTable.h"
 #include "global/Id.h"
+#include "index/ScanSpecification.h"
 #include "parser/data/LimitOffsetClause.h"
 #include "util/Cache.h"
 #include "util/CancellationHandle.h"
diff --git a/src/engine/ScanSpecification.cpp b/src/index/ScanSpecification.cpp
similarity index 98%
rename from src/engine/ScanSpecification.cpp
rename to src/index/ScanSpecification.cpp
index ee8a0543d3..505dc1e051 100644
--- a/src/engine/ScanSpecification.cpp
+++ b/src/index/ScanSpecification.cpp
@@ -2,7 +2,7 @@
 //  Chair of Algorithms and Data Structures.
 //  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
-#include "engine/ScanSpecification.h"
+#include "index/ScanSpecification.h"
 
 #include "index/Index.h"
 #include "index/IndexImpl.h"
diff --git a/src/engine/ScanSpecification.h b/src/index/ScanSpecification.h
similarity index 100%
rename from src/engine/ScanSpecification.h
rename to src/index/ScanSpecification.h
diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt
index d64860a72d..4f6e09ff5f 100644
--- a/test/engine/CMakeLists.txt
+++ b/test/engine/CMakeLists.txt
@@ -4,4 +4,3 @@ addLinkAndDiscoverTest(CartesianProductJoinTest engine)
 addLinkAndDiscoverTest(TextIndexScanForWordTest engine)
 addLinkAndDiscoverTest(TextIndexScanForEntityTest engine)
 addLinkAndDiscoverTest(DistinctTest engine)
-addLinkAndDiscoverTestSerial(ScanSpecificationTest engine index parser scanSpecification)
diff --git a/test/index/CMakeLists.txt b/test/index/CMakeLists.txt
index aa0d0950e1..3651259d1a 100644
--- a/test/index/CMakeLists.txt
+++ b/test/index/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(vocabulary)
 addLinkAndDiscoverTest(PatternCreatorTest index)
+addLinkAndDiscoverTestSerial(ScanSpecificationTest index)
diff --git a/test/engine/ScanSpecificationTest.cpp b/test/index/ScanSpecificationTest.cpp
similarity index 98%
rename from test/engine/ScanSpecificationTest.cpp
rename to test/index/ScanSpecificationTest.cpp
index 63c4386d67..3c7df13755 100644
--- a/test/engine/ScanSpecificationTest.cpp
+++ b/test/index/ScanSpecificationTest.cpp
@@ -6,7 +6,7 @@
 
 #include "../util/GTestHelpers.h"
 #include "../util/IndexTestHelpers.h"
-#include "engine/ScanSpecification.h"
+#include "index/ScanSpecification.h"
 
 // _____________________________________________________________________________
 TEST(ScanSpecification, validate) {

From bfa6dce35c2f08a260e18d9d5dc4d181124426b0 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Sat, 3 Aug 2024 17:44:59 +0200
Subject: [PATCH 14/14] Minor fix in layout.

---
 src/engine/idTable/IdTable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h
index c8fb92d84e..dd9895810c 100644
--- a/src/engine/idTable/IdTable.h
+++ b/src/engine/idTable/IdTable.h
@@ -1,5 +1,5 @@
-///  Copyright 2021, University of Freiburg, Chair of Algorithms and Data
-//  Structures. Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
+// Copyright 2021, University of Freiburg, Chair of Algorithms and Data
+// Structures. Author: Johannes Kalmbach <kalmbacj@cs.uni-freiburg.de>
 
 #pragma once