From edc2889f839fd06b9f0c08df7b5e9b98a4f0f117 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Tue, 26 Nov 2024 03:40:21 +0100 Subject: [PATCH] Fix quadratic runtime of `FILTER` with lazy input and materialized result (#1634) After #1478, a `FILTER` with lazy input and materialized result had quadratic runtime because of repeated calls to `reserve`. This is now fixed by calling `resize` instead. In the special case of a single block, we simply move it and avoid the copy. On the side, refactor `IdTable::insertAtEnd` to insert column by column and not row by row. --- src/engine/Filter.cpp | 42 +++++++++----- src/engine/Filter.h | 8 +-- .../idTable/CompressedExternalIdTable.h | 2 +- src/engine/idTable/IdTable.h | 55 +++++++++++-------- src/index/IndexImpl.cpp | 51 ++++++++--------- test/CompressedRelationsTest.cpp | 4 +- test/ExportQueryExecutionTreesTest.cpp | 3 +- test/FilterTest.cpp | 39 +++++++++++++ test/IdTableTest.cpp | 18 +++++- test/engine/IndexScanTest.cpp | 2 +- 10 files changed, 151 insertions(+), 73 deletions(-) diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 1c8ff01404..66821df134 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -76,8 +76,8 @@ ProtoResult Filter::computeResult(bool requestLaziness) { ad_utility::callFixedSize( width, [this, &subRes, &result, &resultLocalVocab]() { for (Result::IdTableVocabPair& pair : subRes->idTables()) { - computeFilterImpl(result, pair.idTable_, pair.localVocab_, - subRes->sortedBy()); + computeFilterImpl(result, std::move(pair.idTable_), + pair.localVocab_, subRes->sortedBy()); resultLocalVocab.mergeWith(std::span{&pair.localVocab_, 1}); } }); @@ -88,20 +88,24 @@ ProtoResult Filter::computeResult(bool requestLaziness) { } // _____________________________________________________________________________ +template Table> IdTable Filter::filterIdTable(std::vector sortedBy, - const IdTable& idTable, + Table&& idTable, const LocalVocab& localVocab) const { size_t width = idTable.numColumns(); IdTable result{width, getExecutionContext()->getAllocator()}; - CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, result, idTable, - localVocab, std::move(sortedBy)); + + auto impl = [this, &result, &idTable, &localVocab, &sortedBy] { + return this->computeFilterImpl(result, AD_FWD(idTable), localVocab, + std::move(sortedBy)); + }; + ad_utility::callFixedSize(width, impl); return result; } // _____________________________________________________________________________ -template -void Filter::computeFilterImpl(IdTable& dynamicResultTable, - const IdTable& inputTable, +template Table> +void Filter::computeFilterImpl(IdTable& dynamicResultTable, Table&& inputTable, const LocalVocab& localVocab, std::vector sortedBy) const { AD_CONTRACT_CHECK(inputTable.numColumns() == WIDTH || WIDTH == 0); @@ -127,7 +131,8 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable, // required to work around a bug in Clang 17, see // https://github.com/llvm/llvm-project/issues/61267 auto computeResult = - [this, &resultTable = resultTable, &input, + [this, &resultTable = resultTable, &input, &inputTable, + &dynamicResultTable, &evaluationContext]( T&& singleResult) { if constexpr (std::is_same_v) { @@ -145,15 +150,20 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable, size_t intervalEnd = std::min(interval.second, input.size()); return sum + (intervalEnd - intervalBegin); }); - resultTable.reserve(totalSize); + if (resultTable.empty() && totalSize == inputTable.size()) { + // The binary filter contains all elements of the input, and we have + // no previous results, so we can simply copy or move the complete + // table. + dynamicResultTable = AD_FWD(inputTable).moveOrClone(); + return; + } checkCancellation(); for (auto [intervalBegin, intervalEnd] : singleResult._intervals) { intervalEnd = std::min(intervalEnd, input.size()); - resultTable.insertAtEnd(input.cbegin() + intervalBegin, - input.cbegin() + intervalEnd); + resultTable.insertAtEnd(inputTable, intervalBegin, intervalEnd); checkCancellation(); } - AD_CONTRACT_CHECK(resultTable.size() == totalSize); + AD_CORRECTNESS_CHECK(resultTable.size() == totalSize); } else { // In the general case, we generate all expression results and apply // the `EffectiveBooleanValueGetter` to each. @@ -186,7 +196,11 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable, }; std::visit(computeResult, std::move(expressionResult)); - dynamicResultTable = std::move(resultTable).toDynamic(); + // Detect the case that we have directly written the `dynamicResultTable` + // in the binary search filter case. + if (dynamicResultTable.empty()) { + dynamicResultTable = std::move(resultTable).toDynamic(); + } checkCancellation(); } diff --git a/src/engine/Filter.h b/src/engine/Filter.h index eaa9303bb7..a95c356451 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -61,13 +61,13 @@ class Filter : public Operation { ProtoResult computeResult(bool requestLaziness) override; // Perform the actual filter operation of the data provided. - template - void computeFilterImpl(IdTable& dynamicResultTable, const IdTable& input, + template Table> + void computeFilterImpl(IdTable& dynamicResultTable, Table&& input, const LocalVocab& localVocab, std::vector sortedBy) const; // Run `computeFilterImpl` on the provided IdTable - IdTable filterIdTable(std::vector sortedBy, - const IdTable& idTable, + template Table> + IdTable filterIdTable(std::vector sortedBy, Table&& idTable, const LocalVocab& localVocab) const; }; diff --git a/src/engine/idTable/CompressedExternalIdTable.h b/src/engine/idTable/CompressedExternalIdTable.h index 27b78c6121..962d18b6aa 100644 --- a/src/engine/idTable/CompressedExternalIdTable.h +++ b/src/engine/idTable/CompressedExternalIdTable.h @@ -686,7 +686,7 @@ class CompressedExternalIdTableSorter auto curBlock = IdTableStatic( this->numColumns_, this->writer_.allocator()); curBlock.reserve(upper - i); - curBlock.insertAtEnd(block.begin() + i, block.begin() + upper); + curBlock.insertAtEnd(block, i, upper); co_yield std::move(curBlock).template toStatic(); } } diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index c76ee1b9d6..fb28b4aa99 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -444,6 +444,11 @@ class IdTable { push_back(std::ranges::ref_view{newRow}); } + // True iff we can make a copy (via the `clone` function below), because the + // underlying storage types are copyable. + static constexpr bool isCloneable = + std::is_copy_constructible_v && + std::is_copy_constructible_v; // Create a deep copy of this `IdTable` that owns its memory. In most cases // this behaves exactly like the copy constructor with the following // exception: If `this` is a view (because the `isView` template parameter is @@ -451,8 +456,7 @@ class IdTable { // non-owning) view, but `clone` will create a mutable deep copy of the data // that the view points to IdTable clone() const - requires std::is_copy_constructible_v && - std::is_copy_constructible_v { + requires isCloneable { Storage storage; for (const auto& column : getColumns()) { storage.emplace_back(column.begin(), column.end(), getAllocator()); @@ -461,6 +465,14 @@ class IdTable { std::move(storage), numColumns_, numRows_, allocator_}; } + // Move or clone returns a copied or moved IdTable depending on the value + // category of `*this`. The typical usage is + // `auto newTable = AD_FWD(oldTable).moveOrClone()` which is equivalent to the + // pattern `auto newX = AD_FWD(oldX)` where the type is copy-constructible + // (which `IdTable` is not.). + auto moveOrClone() const& requires isCloneable { return clone(); } + IdTable&& moveOrClone() && requires isCloneable { return std::move(*this); } + // Overload of `clone` for `Storage` types that are not copy constructible. // It requires a preconstructed but empty argument of type `Storage` that // is then resized and filled with the appropriate contents. @@ -663,29 +675,28 @@ class IdTable { // otherwise the behavior is undefined. void erase(const iterator& it) requires(!isView) { erase(it, it + 1); } - // Insert all the elements in the range `(beginIt, endIt]` at the end - // of this `IdTable`. `beginIt` and `endIt` must *not* point into this - // IdTable, else the behavior is undefined. - // - // TODO Insert can be done much more efficiently when `beginIt` and - // `endIt` are iterators to a different column-major `IdTable`. Implement - // this case. - void insertAtEnd(auto beginIt, auto endIt) { - for (; beginIt != endIt; ++beginIt) { - push_back(*beginIt); - } - } - // Add all entries from the `table` at the end of this IdTable. - void insertAtEnd(const IdTable& table) { + // If `beginIdx` and/or `endIdx` are specified, then only the subrange + // `[beginIdx, endIdx)` from the input is taken. + // The input must be some kind of `IdTable`. + // TODO Can/should we constraint this functions by a concept? + template + void insertAtEnd(const Table& table, + std::optional beginIdx = std::nullopt, + std::optional endIdx = std::nullopt) { AD_CORRECTNESS_CHECK(table.numColumns() == numColumns()); + auto begin = beginIdx.value_or(0); + auto end = endIdx.value_or(table.size()); + AD_CORRECTNESS_CHECK(begin <= end && end <= table.size()); + auto numInserted = end - begin; auto oldSize = size(); - resize(numRows() + table.numRows_); - std::ranges::for_each(ad_utility::integerRange(numColumns()), - [this, &table, oldSize](size_t i) { - std::ranges::copy(table.getColumn(i), - getColumn(i).begin() + oldSize); - }); + resize(numRows() + numInserted); + std::ranges::for_each( + ad_utility::integerRange(numColumns()), + [this, &table, oldSize, begin, numInserted](size_t i) { + std::ranges::copy(table.getColumn(i).subspan(begin, numInserted), + getColumn(i).begin() + oldSize); + }); } // Check whether two `IdTables` have the same content. Mostly used for unit diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index c1db24b264..938d8880b2 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -202,7 +202,7 @@ IndexImpl::buildOspWithPatterns( } queue.push(std::move(table)); } else { - outputBufferTable.insertAtEnd(table.begin(), table.end()); + outputBufferTable.insertAtEnd(table); if (outputBufferTable.size() >= bufferSize) { queue.push(std::move(outputBufferTable)); outputBufferTable.clear(); @@ -674,30 +674,31 @@ auto IndexImpl::convertPartialToGlobalIds( auto getLookupTask = [&isQLeverInternalTriple, &writeQueue, &transformTriple, &getWriteTask](Buffer triples, std::shared_ptr idMap) { - return - [&isQLeverInternalTriple, &writeQueue, - triples = std::make_shared(std::move(triples)), - idMap = std::move(idMap), &getWriteTask, &transformTriple]() mutable { - for (Buffer::row_reference triple : *triples) { - transformTriple(triple, *idMap); - } - auto [beginInternal, endInternal] = std::ranges::partition( - *triples, [&isQLeverInternalTriple](const auto& row) { - return !isQLeverInternalTriple(row); - }); - IdTableStatic internalTriples( - triples->getAllocator()); - // TODO We could leave the partitioned complete block as is, - // and change the interface of the compressed sorters s.t. we can - // push only a part of a block. We then would safe the copy of the - // internal triples here, but I am not sure whether this is worth it. - internalTriples.insertAtEnd(beginInternal, endInternal); - triples->resize(beginInternal - triples->begin()); - - Buffers buffers{std::move(*triples), std::move(internalTriples)}; - - writeQueue.push(getWriteTask(std::move(buffers))); - }; + return [&isQLeverInternalTriple, &writeQueue, + triples = std::make_shared(std::move(triples)), + idMap = std::move(idMap), &getWriteTask, + &transformTriple]() mutable { + for (Buffer::row_reference triple : *triples) { + transformTriple(triple, *idMap); + } + auto [beginInternal, endInternal] = std::ranges::partition( + *triples, [&isQLeverInternalTriple](const auto& row) { + return !isQLeverInternalTriple(row); + }); + IdTableStatic internalTriples( + triples->getAllocator()); + // TODO We could leave the partitioned complete block as is, + // and change the interface of the compressed sorters s.t. we can + // push only a part of a block. We then would safe the copy of the + // internal triples here, but I am not sure whether this is worth it. + internalTriples.insertAtEnd(*triples, beginInternal - triples->begin(), + endInternal - triples->begin()); + triples->resize(beginInternal - triples->begin()); + + Buffers buffers{std::move(*triples), std::move(internalTriples)}; + + writeQueue.push(getWriteTask(std::move(buffers))); + }; }; std::atomic nextPartialVocabulary = 0; diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp index 6db70c83b4..e9ab85df14 100644 --- a/test/CompressedRelationsTest.cpp +++ b/test/CompressedRelationsTest.cpp @@ -334,7 +334,7 @@ void testCompressedRelations(const auto& inputsOriginalBeforeCopy, for (const auto& block : reader.lazyScan(scanSpec, blocks, additionalColumns, cancellationHandle, locatedTriples)) { - table.insertAtEnd(block.begin(), block.end()); + table.insertAtEnd(block); } checkThatTablesAreEqual(col1And2, table); @@ -358,7 +358,7 @@ void testCompressedRelations(const auto& inputsOriginalBeforeCopy, for (const auto& block : reader.lazyScan(scanSpec, blocks, Permutation::ColumnIndices{}, cancellationHandle, locatedTriples)) { - tableWidthOne.insertAtEnd(block.begin(), block.end()); + tableWidthOne.insertAtEnd(block); } checkThatTablesAreEqual(col3, tableWidthOne); }; diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 663bc46fd0..99c665feef 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -317,8 +317,7 @@ std::vector convertToVector( for (const auto& [pair, range] : generator) { const auto& idTable = pair.idTable_; result.emplace_back(idTable.numColumns(), idTable.getAllocator()); - result.back().insertAtEnd(idTable.begin() + *range.begin(), - idTable.begin() + *(range.end() - 1) + 1); + result.back().insertAtEnd(idTable, *range.begin(), *(range.end() - 1) + 1); } return result; } diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index e683503548..c01b7a57a9 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -7,6 +7,8 @@ #include "engine/Filter.h" #include "engine/ValuesForTesting.h" #include "engine/sparqlExpressions/LiteralExpression.h" +#include "engine/sparqlExpressions/NaryExpression.h" +#include "engine/sparqlExpressions/RelationalExpressions.h" #include "util/IdTableHelpers.h" #include "util/IndexTestHelpers.h" @@ -134,3 +136,40 @@ TEST(Filter, result->idTable(), makeIdTableFromVector({{true}, {true}, {true}, {true}, {true}}, asBool)); } + +template +static constexpr auto make(Args&&... args) { + return std::make_unique(AD_FWD(args)...); +} + +// _____________________________________________________________________________ +TEST(Filter, lazyChildMaterializedResultBinaryFilter) { + QueryExecutionContext* qec = ad_utility::testing::getQec(); + qec->getQueryTreeCache().clearAll(); + std::vector idTables; + auto I = ad_utility::testing::IntId; + idTables.push_back(makeIdTableFromVector({{1}, {2}, {3}, {3}, {4}}, I)); + idTables.push_back(makeIdTableFromVector({{4}, {5}}, I)); + idTables.push_back(makeIdTableFromVector({{6}, {7}}, I)); + idTables.push_back(makeIdTableFromVector({{8}, {8}}, I)); + + auto varX = Variable{"?x"}; + using namespace sparqlExpression; + auto expr = makeUnaryNegateExpression( + make(std::array{ + make(varX), make(I(5))})); + + ValuesForTesting values{ + qec, std::move(idTables), {Variable{"?x"}}, false, {0}}; + QueryExecutionTree subTree{ + qec, std::make_shared(std::move(values))}; + Filter filter{qec, + std::make_shared(std::move(subTree)), + {std::move(expr), "!?x < 5"}}; + + auto result = filter.getResult(false, ComputationMode::FULLY_MATERIALIZED); + ASSERT_TRUE(result->isFullyMaterialized()); + + EXPECT_EQ(result->idTable(), + makeIdTableFromVector({{5}, {6}, {7}, {8}, {8}}, I)); +} diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 34b1ad3072..483f3bcb92 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -358,7 +358,7 @@ TEST(IdTable, insertAtEnd) { Table t2 = clone(init, std::move(additionalArgs.at(2))...); // Test inserting at the end - t2.insertAtEnd(t1.begin(), t1.end()); + t2.insertAtEnd(t1); for (size_t i = 0; i < init.size(); i++) { ASSERT_EQ(init[i], t2[i]) << i; } @@ -646,7 +646,7 @@ TEST(IdTableStaticTest, insert) { IdTableStatic<4> t2 = init.clone(); // Test inserting at the end - t2.insertAtEnd(t1.begin(), t1.end()); + t2.insertAtEnd(t1); for (size_t i = 0; i < init.size(); i++) { for (size_t j = 0; j < init.numColumns(); j++) { EXPECT_EQ(init(i, j), t2(i, j)) << i << ", " << j; @@ -1136,6 +1136,20 @@ TEST(IdTable, addEmptyColumn) { EXPECT_EQ(table.getColumn(1).size(), 2); } +// _____________________________________________________________________________ +TEST(IdTable, moveOrClone) { + IdTable table{1, ad_utility::makeUnlimitedAllocator()}; + table.push_back({V(1)}); + table.push_back({V(2)}); + + auto t2 = table.moveOrClone(); + EXPECT_EQ(table, t2); + auto t3 = std::move(table).moveOrClone(); + EXPECT_EQ(t3, t2); + // `table` was moved from. + EXPECT_TRUE(table.empty()); +} + // Check that we can completely instantiate `IdTable`s with a different value // type and a different underlying storage. diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 7a0383390b..d316d12010 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -41,7 +41,7 @@ void testLazyScan(Permutation::IdTableGenerator partialLazyScanResult, if (lazyScanRes.empty()) { lazyScanRes.setNumColumns(block.numColumns()); } - lazyScanRes.insertAtEnd(block.begin(), block.end()); + lazyScanRes.insertAtEnd(block); ++numBlocks; }