Skip to content

Commit

Permalink
Add InputRange... classes as a replacement for cppcoro::generator (
Browse files Browse the repository at this point in the history
…#1678)

The new classes (called `InputRange...`) and their requirements can be found in `src/util/iterators.h`. They can be used to replace our many uses of `cppcoro::generator`.

For an example, see `src/engine/Result.h`, where `using LazyResult = ad_utility::InputRangeTypeErased<IdTableVocabPair>` is defined as a replacement for `using Generator = cppcoro::generator<IdTableVocabPair>`, and the many places where now `LazyResult` is used instead of `Generator`.

Other replacements will take a little more work.
  • Loading branch information
joka921 authored Dec 16, 2024
1 parent e422852 commit 332d8e5
Show file tree
Hide file tree
Showing 26 changed files with 478 additions and 99 deletions.
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
add_subdirectory(sparqlExpressions)
add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp)
qlever_target_link_libraries(SortPerformanceEstimator)
qlever_target_link_libraries(SortPerformanceEstimator parser)
add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const {

// _____________________________________________________________________________
template <size_t WIDTH>
Result::Generator Distinct::lazyDistinct(Result::Generator input,
Result::Generator Distinct::lazyDistinct(Result::LazyResult input,
bool yieldOnce) const {
IdTable aggregateTable{subtree_->getResultWidth(), allocator()};
LocalVocab aggregateVocab{};
Expand Down
3 changes: 2 additions & 1 deletion src/engine/Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class Distinct : public Operation {
// if every `IdTable` from `input` should yield it's own `IdTable` or if all
// of them should get aggregated into a single big `IdTable`.
template <size_t WIDTH>
Result::Generator lazyDistinct(Result::Generator input, bool yieldOnce) const;
Result::Generator lazyDistinct(Result::LazyResult input,
bool yieldOnce) const;

// Removes all duplicates from input with regards to the columns
// in keepIndices. The input needs to be sorted on the keep indices,
Expand Down
2 changes: 0 additions & 2 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ uint64_t GroupBy::getSizeEstimateBeforeLimit() {
return _subtree->getMultiplicity(_subtree->getVariableColumn(var));
};

// TODO<joka921> Once we can use `std::views` this can be solved
// more elegantly.
float minMultiplicity = ql::ranges::min(
_groupByVariables | ql::views::transform(varToMultiplicity));
return _subtree->getSizeEstimate() / minMultiplicity;
Expand Down
6 changes: 3 additions & 3 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,13 @@ void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) {
// resulting from the generator.
struct IndexScan::SharedGeneratorState {
// The generator that yields the tables to be joined with the index scan.
Result::Generator generator_;
Result::LazyResult generator_;
// The column index of the join column in the tables yielded by the generator.
ColumnIndex joinColumn_;
// Metadata and blocks of this index scan.
Permutation::MetadataAndBlocks metaBlocks_;
// The iterator of the generator that is currently being consumed.
std::optional<Result::Generator::iterator> iterator_ = std::nullopt;
std::optional<Result::LazyResult::iterator> iterator_ = std::nullopt;
// Values returned by the generator that have not been re-yielded yet.
// Typically we expect only 3 or less values to be prefetched (this is an
// implementation detail of `BlockZipperJoinImpl`).
Expand Down Expand Up @@ -648,7 +648,7 @@ Result::Generator IndexScan::createPrefilteredIndexScanSide(

// _____________________________________________________________________________
std::pair<Result::Generator, Result::Generator> IndexScan::prefilterTables(
Result::Generator input, ColumnIndex joinColumn) {
Result::LazyResult input, ColumnIndex joinColumn) {
AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0);
auto metaBlocks = getMetadataForScan();

Expand Down
2 changes: 1 addition & 1 deletion src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class IndexScan final : public Operation {
// there are undef values, the second generator represents the full index
// scan.
std::pair<Result::Generator, Result::Generator> prefilterTables(
Result::Generator input, ColumnIndex joinColumn);
Result::LazyResult input, ColumnIndex joinColumn);

private:
// Implementation detail that allows to consume a generator from two other
Expand Down
6 changes: 4 additions & 2 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ using LazyInputView =
// Convert a `generator<IdTableVocab>` to a `generator<IdTableAndFirstCol>` for
// more efficient access in the join columns below and apply the given
// permutation to each table.
LazyInputView convertGenerator(Result::Generator gen,
OptionalPermutation permutation = {}) {
CPP_template(typename Input)(
requires ad_utility::SameAsAny<Input, Result::Generator,
Result::LazyResult>) LazyInputView
convertGenerator(Input gen, OptionalPermutation permutation = {}) {
for (auto& [table, localVocab] : gen) {
applyPermutation(table, permutation);
// Make sure to actually move the table into the wrapper so that the tables
Expand Down
10 changes: 5 additions & 5 deletions src/engine/Result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ void Result::applyLimitOffset(
limitOffset);
limitTimeCallback(limitTimer.msecs(), idTable());
} else {
auto generator = [](Generator original, LimitOffsetClause limitOffset,
auto generator = [](LazyResult original, LimitOffsetClause limitOffset,
auto limitTimeCallback) -> Generator {
if (limitOffset._limit.value_or(1) == 0) {
co_return;
Expand Down Expand Up @@ -160,7 +160,7 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) {
auto limit = limitOffset._limit;
AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value());
} else {
auto generator = [](Generator original,
auto generator = [](LazyResult original,
LimitOffsetClause limitOffset) -> Generator {
auto limit = limitOffset._limit;
uint64_t elementCount = 0;
Expand Down Expand Up @@ -192,7 +192,7 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) {
AD_EXPENSIVE_CHECK(performCheck(
varColMap, std::get<IdTableSharedLocalVocabPair>(data_).idTable_));
} else {
auto generator = [](Generator original,
auto generator = [](LazyResult original,
[[maybe_unused]] VariableToColumnMap varColMap,
[[maybe_unused]] auto performCheck) -> Generator {
for (IdTableVocabPair& pair : original) {
Expand All @@ -212,7 +212,7 @@ void Result::runOnNewChunkComputed(
onNewChunk,
std::function<void(bool)> onGeneratorFinished) {
AD_CONTRACT_CHECK(!isFullyMaterialized());
auto generator = [](Generator original, auto onNewChunk,
auto generator = [](LazyResult original, auto onNewChunk,
auto onGeneratorFinished) -> Generator {
// Call this within destructor to make sure it is also called when an
// operation stops iterating before reaching the end.
Expand Down Expand Up @@ -254,7 +254,7 @@ const IdTable& Result::idTable() const {
}

// _____________________________________________________________________________
Result::Generator& Result::idTables() const {
Result::LazyResult& Result::idTables() const {
AD_CONTRACT_CHECK(!isFullyMaterialized());
const auto& container = std::get<GenContainer>(data_);
AD_CONTRACT_CHECK(!container.consumed_->exchange(true));
Expand Down
13 changes: 10 additions & 3 deletions src/engine/Result.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,23 @@ class Result {
: idTable_{std::move(idTable)}, localVocab_{std::move(localVocab)} {}
};

// The current implementation of (most of the) lazy results. Will be replaced
// in the future to make QLever compatible with C++17 again.
using Generator = cppcoro::generator<IdTableVocabPair>;
// The lazy result type that is actually stored. It is type-erased and allows
// explicit conversion from the `Generator` above.
using LazyResult = ad_utility::InputRangeTypeErased<IdTableVocabPair>;

private:
// Needs to be mutable in order to be consumable from a const result.
struct GenContainer {
mutable Generator generator_;
mutable LazyResult generator_;
mutable std::unique_ptr<std::atomic_bool> consumed_ =
std::make_unique<std::atomic_bool>(false);
explicit GenContainer(Generator generator)
explicit GenContainer(LazyResult generator)
: generator_{std::move(generator)} {}
explicit GenContainer(Generator generator)
: generator_{Generator{std::move(generator)}} {}
};

using LocalVocabPtr = std::shared_ptr<const LocalVocab>;
Expand Down Expand Up @@ -155,7 +162,7 @@ class Result {

// Access to the underlying `IdTable`s. Throw an `ad_utility::Exception`
// if the underlying `data_` member holds the wrong variant.
Generator& idTables() const;
LazyResult& idTables() const;

// Const access to the columns by which the `idTable()` is sorted.
const std::vector<ColumnIndex>& sortedBy() const { return sortedBy_; }
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,8 @@ void Service::precomputeSiblingResult(std::shared_ptr<Operation> left,
// Creates a `Result::Generator` from partially materialized result data.
auto partialResultGenerator =
[](std::vector<Result::IdTableVocabPair> pairs,
Result::Generator prevGenerator,
Result::Generator::iterator it) -> Result::Generator {
Result::LazyResult prevGenerator,
std::ranges::iterator_t<Result::LazyResult> it) -> Result::Generator {
for (auto& pair : pairs) {
co_yield pair;
}
Expand Down
2 changes: 1 addition & 1 deletion src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
std::vector<SparqlTriple*> matchingTriples;
using BasicPattern = parsedQuery::BasicGraphPattern;
namespace ad = ad_utility;
namespace stdv = std::views;
namespace stdv = ql::views;
for (BasicPattern* basicPattern :
_graphPatterns | stdv::transform(ad::getIf<BasicPattern>) |
stdv::filter(ad::toBool)) {
Expand Down
2 changes: 1 addition & 1 deletion src/util/BackgroundStxxlSorter.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class BackgroundStxxlSorter {
/// Transition from the input phase, where `push()` may be called, to the
/// output phase and return a generator that yields the sorted elements. This
/// function may be called exactly once.
[[nodiscard]] cppcoro::generator<value_type> sortedView() {
[[nodiscard]] auto sortedView() {
setupSort();
return bufferedAsyncView(outputGeneratorImpl(), _numElementsInRun);
}
Expand Down
5 changes: 3 additions & 2 deletions src/util/Generators.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ namespace ad_utility {
// returns false. If the `aggregator` returns false, the cached value is
// discarded. If the cached value is still present once the generator is fully
// consumed, `onFullyCached` is called with the cached value.
template <typename T>
template <typename InputRange,
typename T = std::ranges::range_value_t<InputRange>>
cppcoro::generator<T> wrapGeneratorWithCache(
cppcoro::generator<T> generator,
InputRange generator,
InvocableWithExactReturnType<bool, std::optional<T>&, const T&> auto
aggregator,
InvocableWithExactReturnType<void, T> auto onFullyCached) {
Expand Down
Loading

0 comments on commit 332d8e5

Please sign in to comment.