Skip to content

Commit

Permalink
Fix quadratic runtime of FILTER with lazy input and materialized re…
Browse files Browse the repository at this point in the history
…sult (#1634)

After #1478, a `FILTER` with lazy input and materialized result had quadratic runtime because of repeated calls to `reserve`. This is now fixed by calling `resize` instead. In the special case of a single block, we simply move it and avoid the copy. On the side, refactor `IdTable::insertAtEnd` to insert column by column and not row by row.
  • Loading branch information
joka921 authored Nov 26, 2024
1 parent 7bffcd5 commit edc2889
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 73 deletions.
42 changes: 28 additions & 14 deletions src/engine/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ ProtoResult Filter::computeResult(bool requestLaziness) {
ad_utility::callFixedSize(
width, [this, &subRes, &result, &resultLocalVocab]<int WIDTH>() {
for (Result::IdTableVocabPair& pair : subRes->idTables()) {
computeFilterImpl<WIDTH>(result, pair.idTable_, pair.localVocab_,
subRes->sortedBy());
computeFilterImpl<WIDTH>(result, std::move(pair.idTable_),
pair.localVocab_, subRes->sortedBy());
resultLocalVocab.mergeWith(std::span{&pair.localVocab_, 1});
}
});
Expand All @@ -88,20 +88,24 @@ ProtoResult Filter::computeResult(bool requestLaziness) {
}

// _____________________________________________________________________________
template <ad_utility::SimilarTo<IdTable> Table>
IdTable Filter::filterIdTable(std::vector<ColumnIndex> sortedBy,
const IdTable& idTable,
Table&& idTable,
const LocalVocab& localVocab) const {
size_t width = idTable.numColumns();
IdTable result{width, getExecutionContext()->getAllocator()};
CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, result, idTable,
localVocab, std::move(sortedBy));

auto impl = [this, &result, &idTable, &localVocab, &sortedBy]<int WIDTH> {
return this->computeFilterImpl<WIDTH>(result, AD_FWD(idTable), localVocab,
std::move(sortedBy));
};
ad_utility::callFixedSize(width, impl);
return result;
}

// _____________________________________________________________________________
template <int WIDTH>
void Filter::computeFilterImpl(IdTable& dynamicResultTable,
const IdTable& inputTable,
template <int WIDTH, ad_utility::SimilarTo<IdTable> Table>
void Filter::computeFilterImpl(IdTable& dynamicResultTable, Table&& inputTable,
const LocalVocab& localVocab,
std::vector<ColumnIndex> sortedBy) const {
AD_CONTRACT_CHECK(inputTable.numColumns() == WIDTH || WIDTH == 0);
Expand All @@ -127,7 +131,8 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable,
// required to work around a bug in Clang 17, see
// https://github.com/llvm/llvm-project/issues/61267
auto computeResult =
[this, &resultTable = resultTable, &input,
[this, &resultTable = resultTable, &input, &inputTable,
&dynamicResultTable,
&evaluationContext]<sparqlExpression::SingleExpressionResult T>(
T&& singleResult) {
if constexpr (std::is_same_v<T, ad_utility::SetOfIntervals>) {
Expand All @@ -145,15 +150,20 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable,
size_t intervalEnd = std::min(interval.second, input.size());
return sum + (intervalEnd - intervalBegin);
});
resultTable.reserve(totalSize);
if (resultTable.empty() && totalSize == inputTable.size()) {
// The binary filter contains all elements of the input, and we have
// no previous results, so we can simply copy or move the complete
// table.
dynamicResultTable = AD_FWD(inputTable).moveOrClone();
return;
}
checkCancellation();
for (auto [intervalBegin, intervalEnd] : singleResult._intervals) {
intervalEnd = std::min(intervalEnd, input.size());
resultTable.insertAtEnd(input.cbegin() + intervalBegin,
input.cbegin() + intervalEnd);
resultTable.insertAtEnd(inputTable, intervalBegin, intervalEnd);
checkCancellation();
}
AD_CONTRACT_CHECK(resultTable.size() == totalSize);
AD_CORRECTNESS_CHECK(resultTable.size() == totalSize);
} else {
// In the general case, we generate all expression results and apply
// the `EffectiveBooleanValueGetter` to each.
Expand Down Expand Up @@ -186,7 +196,11 @@ void Filter::computeFilterImpl(IdTable& dynamicResultTable,
};
std::visit(computeResult, std::move(expressionResult));

dynamicResultTable = std::move(resultTable).toDynamic();
// Detect the case that we have directly written the `dynamicResultTable`
// in the binary search filter case.
if (dynamicResultTable.empty()) {
dynamicResultTable = std::move(resultTable).toDynamic();
}
checkCancellation();
}

Expand Down
8 changes: 4 additions & 4 deletions src/engine/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ class Filter : public Operation {
ProtoResult computeResult(bool requestLaziness) override;

// Perform the actual filter operation of the data provided.
template <int WIDTH>
void computeFilterImpl(IdTable& dynamicResultTable, const IdTable& input,
template <int WIDTH, ad_utility::SimilarTo<IdTable> Table>
void computeFilterImpl(IdTable& dynamicResultTable, Table&& input,
const LocalVocab& localVocab,
std::vector<ColumnIndex> sortedBy) const;

// Run `computeFilterImpl` on the provided IdTable
IdTable filterIdTable(std::vector<ColumnIndex> sortedBy,
const IdTable& idTable,
template <ad_utility::SimilarTo<IdTable> Table>
IdTable filterIdTable(std::vector<ColumnIndex> sortedBy, Table&& idTable,
const LocalVocab& localVocab) const;
};
2 changes: 1 addition & 1 deletion src/engine/idTable/CompressedExternalIdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ class CompressedExternalIdTableSorter
auto curBlock = IdTableStatic<NumStaticCols>(
this->numColumns_, this->writer_.allocator());
curBlock.reserve(upper - i);
curBlock.insertAtEnd(block.begin() + i, block.begin() + upper);
curBlock.insertAtEnd(block, i, upper);
co_yield std::move(curBlock).template toStatic<N>();
}
}
Expand Down
55 changes: 33 additions & 22 deletions src/engine/idTable/IdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,15 +444,19 @@ class IdTable {
push_back(std::ranges::ref_view{newRow});
}

// True iff we can make a copy (via the `clone` function below), because the
// underlying storage types are copyable.
static constexpr bool isCloneable =
std::is_copy_constructible_v<Storage> &&
std::is_copy_constructible_v<ColumnStorage>;
// Create a deep copy of this `IdTable` that owns its memory. In most cases
// this behaves exactly like the copy constructor with the following
// exception: If `this` is a view (because the `isView` template parameter is
// `true`), then the copy constructor will also create a (const and
// non-owning) view, but `clone` will create a mutable deep copy of the data
// that the view points to
IdTable<T, NumColumns, ColumnStorage, IsView::False> clone() const
requires std::is_copy_constructible_v<Storage> &&
std::is_copy_constructible_v<ColumnStorage> {
requires isCloneable {
Storage storage;
for (const auto& column : getColumns()) {
storage.emplace_back(column.begin(), column.end(), getAllocator());
Expand All @@ -461,6 +465,14 @@ class IdTable {
std::move(storage), numColumns_, numRows_, allocator_};
}

// Move or clone returns a copied or moved IdTable depending on the value
// category of `*this`. The typical usage is
// `auto newTable = AD_FWD(oldTable).moveOrClone()` which is equivalent to the
// pattern `auto newX = AD_FWD(oldX)` where the type is copy-constructible
// (which `IdTable` is not.).
auto moveOrClone() const& requires isCloneable { return clone(); }
IdTable&& moveOrClone() && requires isCloneable { return std::move(*this); }

// Overload of `clone` for `Storage` types that are not copy constructible.
// It requires a preconstructed but empty argument of type `Storage` that
// is then resized and filled with the appropriate contents.
Expand Down Expand Up @@ -663,29 +675,28 @@ class IdTable {
// otherwise the behavior is undefined.
void erase(const iterator& it) requires(!isView) { erase(it, it + 1); }

// Insert all the elements in the range `(beginIt, endIt]` at the end
// of this `IdTable`. `beginIt` and `endIt` must *not* point into this
// IdTable, else the behavior is undefined.
//
// TODO<joka921> Insert can be done much more efficiently when `beginIt` and
// `endIt` are iterators to a different column-major `IdTable`. Implement
// this case.
void insertAtEnd(auto beginIt, auto endIt) {
for (; beginIt != endIt; ++beginIt) {
push_back(*beginIt);
}
}

// Add all entries from the `table` at the end of this IdTable.
void insertAtEnd(const IdTable& table) {
// If `beginIdx` and/or `endIdx` are specified, then only the subrange
// `[beginIdx, endIdx)` from the input is taken.
// The input must be some kind of `IdTable`.
// TODO<joka921> Can/should we constraint this functions by a concept?
template <typename Table>
void insertAtEnd(const Table& table,
std::optional<size_t> beginIdx = std::nullopt,
std::optional<size_t> endIdx = std::nullopt) {
AD_CORRECTNESS_CHECK(table.numColumns() == numColumns());
auto begin = beginIdx.value_or(0);
auto end = endIdx.value_or(table.size());
AD_CORRECTNESS_CHECK(begin <= end && end <= table.size());
auto numInserted = end - begin;
auto oldSize = size();
resize(numRows() + table.numRows_);
std::ranges::for_each(ad_utility::integerRange(numColumns()),
[this, &table, oldSize](size_t i) {
std::ranges::copy(table.getColumn(i),
getColumn(i).begin() + oldSize);
});
resize(numRows() + numInserted);
std::ranges::for_each(
ad_utility::integerRange(numColumns()),
[this, &table, oldSize, begin, numInserted](size_t i) {
std::ranges::copy(table.getColumn(i).subspan(begin, numInserted),
getColumn(i).begin() + oldSize);
});
}

// Check whether two `IdTables` have the same content. Mostly used for unit
Expand Down
51 changes: 26 additions & 25 deletions src/index/IndexImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ IndexImpl::buildOspWithPatterns(
}
queue.push(std::move(table));
} else {
outputBufferTable.insertAtEnd(table.begin(), table.end());
outputBufferTable.insertAtEnd(table);
if (outputBufferTable.size() >= bufferSize) {
queue.push(std::move(outputBufferTable));
outputBufferTable.clear();
Expand Down Expand Up @@ -674,30 +674,31 @@ auto IndexImpl::convertPartialToGlobalIds(
auto getLookupTask = [&isQLeverInternalTriple, &writeQueue, &transformTriple,
&getWriteTask](Buffer triples,
std::shared_ptr<Map> idMap) {
return
[&isQLeverInternalTriple, &writeQueue,
triples = std::make_shared<Buffer>(std::move(triples)),
idMap = std::move(idMap), &getWriteTask, &transformTriple]() mutable {
for (Buffer::row_reference triple : *triples) {
transformTriple(triple, *idMap);
}
auto [beginInternal, endInternal] = std::ranges::partition(
*triples, [&isQLeverInternalTriple](const auto& row) {
return !isQLeverInternalTriple(row);
});
IdTableStatic<NumColumnsIndexBuilding> internalTriples(
triples->getAllocator());
// TODO<joka921> We could leave the partitioned complete block as is,
// and change the interface of the compressed sorters s.t. we can
// push only a part of a block. We then would safe the copy of the
// internal triples here, but I am not sure whether this is worth it.
internalTriples.insertAtEnd(beginInternal, endInternal);
triples->resize(beginInternal - triples->begin());

Buffers buffers{std::move(*triples), std::move(internalTriples)};

writeQueue.push(getWriteTask(std::move(buffers)));
};
return [&isQLeverInternalTriple, &writeQueue,
triples = std::make_shared<Buffer>(std::move(triples)),
idMap = std::move(idMap), &getWriteTask,
&transformTriple]() mutable {
for (Buffer::row_reference triple : *triples) {
transformTriple(triple, *idMap);
}
auto [beginInternal, endInternal] = std::ranges::partition(
*triples, [&isQLeverInternalTriple](const auto& row) {
return !isQLeverInternalTriple(row);
});
IdTableStatic<NumColumnsIndexBuilding> internalTriples(
triples->getAllocator());
// TODO<joka921> We could leave the partitioned complete block as is,
// and change the interface of the compressed sorters s.t. we can
// push only a part of a block. We then would safe the copy of the
// internal triples here, but I am not sure whether this is worth it.
internalTriples.insertAtEnd(*triples, beginInternal - triples->begin(),
endInternal - triples->begin());
triples->resize(beginInternal - triples->begin());

Buffers buffers{std::move(*triples), std::move(internalTriples)};

writeQueue.push(getWriteTask(std::move(buffers)));
};
};

std::atomic<size_t> nextPartialVocabulary = 0;
Expand Down
4 changes: 2 additions & 2 deletions test/CompressedRelationsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ void testCompressedRelations(const auto& inputsOriginalBeforeCopy,
for (const auto& block :
reader.lazyScan(scanSpec, blocks, additionalColumns,
cancellationHandle, locatedTriples)) {
table.insertAtEnd(block.begin(), block.end());
table.insertAtEnd(block);
}
checkThatTablesAreEqual(col1And2, table);

Expand All @@ -358,7 +358,7 @@ void testCompressedRelations(const auto& inputsOriginalBeforeCopy,
for (const auto& block :
reader.lazyScan(scanSpec, blocks, Permutation::ColumnIndices{},
cancellationHandle, locatedTriples)) {
tableWidthOne.insertAtEnd(block.begin(), block.end());
tableWidthOne.insertAtEnd(block);
}
checkThatTablesAreEqual(col3, tableWidthOne);
};
Expand Down
3 changes: 1 addition & 2 deletions test/ExportQueryExecutionTreesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,7 @@ std::vector<IdTable> convertToVector(
for (const auto& [pair, range] : generator) {
const auto& idTable = pair.idTable_;
result.emplace_back(idTable.numColumns(), idTable.getAllocator());
result.back().insertAtEnd(idTable.begin() + *range.begin(),
idTable.begin() + *(range.end() - 1) + 1);
result.back().insertAtEnd(idTable, *range.begin(), *(range.end() - 1) + 1);
}
return result;
}
Expand Down
39 changes: 39 additions & 0 deletions test/FilterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "engine/Filter.h"
#include "engine/ValuesForTesting.h"
#include "engine/sparqlExpressions/LiteralExpression.h"
#include "engine/sparqlExpressions/NaryExpression.h"
#include "engine/sparqlExpressions/RelationalExpressions.h"
#include "util/IdTableHelpers.h"
#include "util/IndexTestHelpers.h"

Expand Down Expand Up @@ -134,3 +136,40 @@ TEST(Filter,
result->idTable(),
makeIdTableFromVector({{true}, {true}, {true}, {true}, {true}}, asBool));
}

template <typename Expr, typename... Args>
static constexpr auto make(Args&&... args) {
return std::make_unique<Expr>(AD_FWD(args)...);
}

// _____________________________________________________________________________
TEST(Filter, lazyChildMaterializedResultBinaryFilter) {
QueryExecutionContext* qec = ad_utility::testing::getQec();
qec->getQueryTreeCache().clearAll();
std::vector<IdTable> idTables;
auto I = ad_utility::testing::IntId;
idTables.push_back(makeIdTableFromVector({{1}, {2}, {3}, {3}, {4}}, I));
idTables.push_back(makeIdTableFromVector({{4}, {5}}, I));
idTables.push_back(makeIdTableFromVector({{6}, {7}}, I));
idTables.push_back(makeIdTableFromVector({{8}, {8}}, I));

auto varX = Variable{"?x"};
using namespace sparqlExpression;
auto expr = makeUnaryNegateExpression(
make<LessThanExpression>(std::array<SparqlExpression::Ptr, 2>{
make<VariableExpression>(varX), make<IdExpression>(I(5))}));

ValuesForTesting values{
qec, std::move(idTables), {Variable{"?x"}}, false, {0}};
QueryExecutionTree subTree{
qec, std::make_shared<ValuesForTesting>(std::move(values))};
Filter filter{qec,
std::make_shared<QueryExecutionTree>(std::move(subTree)),
{std::move(expr), "!?x < 5"}};

auto result = filter.getResult(false, ComputationMode::FULLY_MATERIALIZED);
ASSERT_TRUE(result->isFullyMaterialized());

EXPECT_EQ(result->idTable(),
makeIdTableFromVector({{5}, {6}, {7}, {8}, {8}}, I));
}
18 changes: 16 additions & 2 deletions test/IdTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ TEST(IdTable, insertAtEnd) {

Table t2 = clone(init, std::move(additionalArgs.at(2))...);
// Test inserting at the end
t2.insertAtEnd(t1.begin(), t1.end());
t2.insertAtEnd(t1);
for (size_t i = 0; i < init.size(); i++) {
ASSERT_EQ(init[i], t2[i]) << i;
}
Expand Down Expand Up @@ -646,7 +646,7 @@ TEST(IdTableStaticTest, insert) {
IdTableStatic<4> t2 = init.clone();

// Test inserting at the end
t2.insertAtEnd(t1.begin(), t1.end());
t2.insertAtEnd(t1);
for (size_t i = 0; i < init.size(); i++) {
for (size_t j = 0; j < init.numColumns(); j++) {
EXPECT_EQ(init(i, j), t2(i, j)) << i << ", " << j;
Expand Down Expand Up @@ -1136,6 +1136,20 @@ TEST(IdTable, addEmptyColumn) {
EXPECT_EQ(table.getColumn(1).size(), 2);
}

// _____________________________________________________________________________
TEST(IdTable, moveOrClone) {
IdTable table{1, ad_utility::makeUnlimitedAllocator<Id>()};
table.push_back({V(1)});
table.push_back({V(2)});

auto t2 = table.moveOrClone();
EXPECT_EQ(table, t2);
auto t3 = std::move(table).moveOrClone();
EXPECT_EQ(t3, t2);
// `table` was moved from.
EXPECT_TRUE(table.empty());
}

// Check that we can completely instantiate `IdTable`s with a different value
// type and a different underlying storage.

Expand Down
2 changes: 1 addition & 1 deletion test/engine/IndexScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void testLazyScan(Permutation::IdTableGenerator partialLazyScanResult,
if (lazyScanRes.empty()) {
lazyScanRes.setNumColumns(block.numColumns());
}
lazyScanRes.insertAtEnd(block.begin(), block.end());
lazyScanRes.insertAtEnd(block);
++numBlocks;
}

Expand Down

0 comments on commit edc2889

Please sign in to comment.