diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 5789f50023..be22a64d5d 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(sparqlExpressions) add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) -qlever_target_link_libraries(SortPerformanceEstimator) +qlever_target_link_libraries(SortPerformanceEstimator parser) add_library(engine Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp IndexScan.cpp Join.cpp Sort.cpp diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index a3047569a1..2938bc30e6 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -35,7 +35,7 @@ VariableToColumnMap Distinct::computeVariableToColumnMap() const { // _____________________________________________________________________________ template -Result::Generator Distinct::lazyDistinct(Result::Generator input, +Result::Generator Distinct::lazyDistinct(Result::LazyResult input, bool yieldOnce) const { IdTable aggregateTable{subtree_->getResultWidth(), allocator()}; LocalVocab aggregateVocab{}; diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index 0fda43fd50..dba3e60b15 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -64,7 +64,8 @@ class Distinct : public Operation { // if every `IdTable` from `input` should yield it's own `IdTable` or if all // of them should get aggregated into a single big `IdTable`. template - Result::Generator lazyDistinct(Result::Generator input, bool yieldOnce) const; + Result::Generator lazyDistinct(Result::LazyResult input, + bool yieldOnce) const; // Removes all duplicates from input with regards to the columns // in keepIndices. The input needs to be sorted on the keep indices, diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 1ae50b1b79..9dde7353a3 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -177,8 +177,6 @@ uint64_t GroupBy::getSizeEstimateBeforeLimit() { return _subtree->getMultiplicity(_subtree->getVariableColumn(var)); }; - // TODO Once we can use `std::views` this can be solved - // more elegantly. float minMultiplicity = ql::ranges::min( _groupByVariables | ql::views::transform(varToMultiplicity)); return _subtree->getSizeEstimate() / minMultiplicity; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 0cd735863d..faa9cda3c5 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -489,13 +489,13 @@ void IndexScan::updateRuntimeInfoForLazyScan(const LazyScanMetadata& metadata) { // resulting from the generator. struct IndexScan::SharedGeneratorState { // The generator that yields the tables to be joined with the index scan. - Result::Generator generator_; + Result::LazyResult generator_; // The column index of the join column in the tables yielded by the generator. ColumnIndex joinColumn_; // Metadata and blocks of this index scan. Permutation::MetadataAndBlocks metaBlocks_; // The iterator of the generator that is currently being consumed. - std::optional iterator_ = std::nullopt; + std::optional iterator_ = std::nullopt; // Values returned by the generator that have not been re-yielded yet. // Typically we expect only 3 or less values to be prefetched (this is an // implementation detail of `BlockZipperJoinImpl`). @@ -648,7 +648,7 @@ Result::Generator IndexScan::createPrefilteredIndexScanSide( // _____________________________________________________________________________ std::pair IndexScan::prefilterTables( - Result::Generator input, ColumnIndex joinColumn) { + Result::LazyResult input, ColumnIndex joinColumn) { AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0); auto metaBlocks = getMetadataForScan(); diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index d778260efe..72d377cfc3 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -106,7 +106,7 @@ class IndexScan final : public Operation { // there are undef values, the second generator represents the full index // scan. std::pair prefilterTables( - Result::Generator input, ColumnIndex joinColumn); + Result::LazyResult input, ColumnIndex joinColumn); private: // Implementation detail that allows to consume a generator from two other diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index b7b25a8e74..9956ff3e09 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -40,8 +40,10 @@ using LazyInputView = // Convert a `generator` to a `generator` for // more efficient access in the join columns below and apply the given // permutation to each table. -LazyInputView convertGenerator(Result::Generator gen, - OptionalPermutation permutation = {}) { +CPP_template(typename Input)( + requires ad_utility::SameAsAny) LazyInputView + convertGenerator(Input gen, OptionalPermutation permutation = {}) { for (auto& [table, localVocab] : gen) { applyPermutation(table, permutation); // Make sure to actually move the table into the wrapper so that the tables diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index a1cfa10583..3b476777bb 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -124,7 +124,7 @@ void Result::applyLimitOffset( limitOffset); limitTimeCallback(limitTimer.msecs(), idTable()); } else { - auto generator = [](Generator original, LimitOffsetClause limitOffset, + auto generator = [](LazyResult original, LimitOffsetClause limitOffset, auto limitTimeCallback) -> Generator { if (limitOffset._limit.value_or(1) == 0) { co_return; @@ -160,7 +160,7 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { auto limit = limitOffset._limit; AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value()); } else { - auto generator = [](Generator original, + auto generator = [](LazyResult original, LimitOffsetClause limitOffset) -> Generator { auto limit = limitOffset._limit; uint64_t elementCount = 0; @@ -192,7 +192,7 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { AD_EXPENSIVE_CHECK(performCheck( varColMap, std::get(data_).idTable_)); } else { - auto generator = [](Generator original, + auto generator = [](LazyResult original, [[maybe_unused]] VariableToColumnMap varColMap, [[maybe_unused]] auto performCheck) -> Generator { for (IdTableVocabPair& pair : original) { @@ -212,7 +212,7 @@ void Result::runOnNewChunkComputed( onNewChunk, std::function onGeneratorFinished) { AD_CONTRACT_CHECK(!isFullyMaterialized()); - auto generator = [](Generator original, auto onNewChunk, + auto generator = [](LazyResult original, auto onNewChunk, auto onGeneratorFinished) -> Generator { // Call this within destructor to make sure it is also called when an // operation stops iterating before reaching the end. @@ -254,7 +254,7 @@ const IdTable& Result::idTable() const { } // _____________________________________________________________________________ -Result::Generator& Result::idTables() const { +Result::LazyResult& Result::idTables() const { AD_CONTRACT_CHECK(!isFullyMaterialized()); const auto& container = std::get(data_); AD_CONTRACT_CHECK(!container.consumed_->exchange(true)); diff --git a/src/engine/Result.h b/src/engine/Result.h index af23432961..10b7364a3e 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -33,16 +33,23 @@ class Result { : idTable_{std::move(idTable)}, localVocab_{std::move(localVocab)} {} }; + // The current implementation of (most of the) lazy results. Will be replaced + // in the future to make QLever compatible with C++17 again. using Generator = cppcoro::generator; + // The lazy result type that is actually stored. It is type-erased and allows + // explicit conversion from the `Generator` above. + using LazyResult = ad_utility::InputRangeTypeErased; private: // Needs to be mutable in order to be consumable from a const result. struct GenContainer { - mutable Generator generator_; + mutable LazyResult generator_; mutable std::unique_ptr consumed_ = std::make_unique(false); - explicit GenContainer(Generator generator) + explicit GenContainer(LazyResult generator) : generator_{std::move(generator)} {} + explicit GenContainer(Generator generator) + : generator_{Generator{std::move(generator)}} {} }; using LocalVocabPtr = std::shared_ptr; @@ -155,7 +162,7 @@ class Result { // Access to the underlying `IdTable`s. Throw an `ad_utility::Exception` // if the underlying `data_` member holds the wrong variant. - Generator& idTables() const; + LazyResult& idTables() const; // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return sortedBy_; } diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 81df6be64c..21338be71b 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -564,8 +564,8 @@ void Service::precomputeSiblingResult(std::shared_ptr left, // Creates a `Result::Generator` from partially materialized result data. auto partialResultGenerator = [](std::vector pairs, - Result::Generator prevGenerator, - Result::Generator::iterator it) -> Result::Generator { + Result::LazyResult prevGenerator, + std::ranges::iterator_t it) -> Result::Generator { for (auto& pair : pairs) { co_yield pair; } diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index 6cb844071b..c4326dfc92 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -265,7 +265,7 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, std::vector matchingTriples; using BasicPattern = parsedQuery::BasicGraphPattern; namespace ad = ad_utility; - namespace stdv = std::views; + namespace stdv = ql::views; for (BasicPattern* basicPattern : _graphPatterns | stdv::transform(ad::getIf) | stdv::filter(ad::toBool)) { diff --git a/src/util/BackgroundStxxlSorter.h b/src/util/BackgroundStxxlSorter.h index d142f81a3c..8ab866bfff 100644 --- a/src/util/BackgroundStxxlSorter.h +++ b/src/util/BackgroundStxxlSorter.h @@ -102,7 +102,7 @@ class BackgroundStxxlSorter { /// Transition from the input phase, where `push()` may be called, to the /// output phase and return a generator that yields the sorted elements. This /// function may be called exactly once. - [[nodiscard]] cppcoro::generator sortedView() { + [[nodiscard]] auto sortedView() { setupSort(); return bufferedAsyncView(outputGeneratorImpl(), _numElementsInRun); } diff --git a/src/util/Generators.h b/src/util/Generators.h index 9157c48359..db232d0188 100644 --- a/src/util/Generators.h +++ b/src/util/Generators.h @@ -19,9 +19,10 @@ namespace ad_utility { // returns false. If the `aggregator` returns false, the cached value is // discarded. If the cached value is still present once the generator is fully // consumed, `onFullyCached` is called with the cached value. -template +template > cppcoro::generator wrapGeneratorWithCache( - cppcoro::generator generator, + InputRange generator, InvocableWithExactReturnType&, const T&> auto aggregator, InvocableWithExactReturnType auto onFullyCached) { diff --git a/src/util/Iterators.h b/src/util/Iterators.h index a837d6e39a..1337b63e27 100644 --- a/src/util/Iterators.h +++ b/src/util/Iterators.h @@ -10,6 +10,7 @@ #include #include "util/Enums.h" +#include "util/TypeTraits.h" namespace ad_utility { @@ -49,7 +50,7 @@ class IteratorForAccessOperator { using iterator_category = std::random_access_iterator_tag; using difference_type = int64_t; using index_type = uint64_t; - // It is possible to explicitly specify the `value_type` and `reference_type` + // It is possible to explicitly specify the `value_type` and `reference` // if they differ from the defaults. For an example, see the `IdTable` class // which uses a proxy type as its `reference`. using value_type = std::conditional_t< @@ -182,6 +183,221 @@ auto makeForwardingIterator(It iterator) { } } +// This CRTP-Mixin can be used to add iterators to a simple state-machine like +// class, s.t. it behaves like an `InputRange`. The derived class needs the +// following functions: `start()`, `isFinished()`, `get()` , `next()`. +// * `void start()` -> called when `begin()` is called to allow for deferred +// initialization. After calling `start()` either `get()` must return the first +// element, or `isFinished()` must return true ( for an empty range). +// * `bool isFinished()` -> has to return true if there are no more values, and +// calls to `get()` are thus impossible. +// * `reference get()` -> get the current value (typically as a reference). +// * `void next()` advance to the next value. After calling `next()` either +// `isFinished()` must be true, or `get()` must return the next value. +template +class InputRangeMixin { + public: + // Cast `this` to the derived class for easier access. + Derived& derived() { return static_cast(*this); } + const Derived& derived() const { return static_cast(*this); } + + // A simple sentinel which is returned by the call to `end()`. + struct Sentinel {}; + + // The iterator class. + class Iterator { + public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::int64_t; + using reference = decltype(std::declval().get()); + using value_type = std::remove_reference_t; + using pointer = value_type*; + InputRangeMixin* mixin_ = nullptr; + + public: + Iterator() = default; + explicit Iterator(InputRangeMixin* mixin) : mixin_{mixin} {} + Iterator& operator++() { + mixin_->derived().next(); + return *this; + } + + // Needed for the `range` concept. + void operator++(int) { (void)operator++(); } + + decltype(auto) operator*() { return mixin_->derived().get(); } + decltype(auto) operator*() const { return mixin_->derived().get(); } + decltype(auto) operator->() { return std::addressof(operator*()); } + decltype(auto) operator->() const { return std::addressof(operator*()); } + + // The comparison `it == end()` just queries `isFinished()` , so an empty + // `Sentinel` suffices. + friend bool operator==(const Iterator& it, Sentinel) { + return it.mixin_->derived().isFinished(); + } + friend bool operator==(Sentinel s, const Iterator& it) { return it == s; } + friend bool operator!=(const Iterator& it, Sentinel s) { + return !(it == s); + } + friend bool operator!=(Sentinel s, const Iterator& it) { + return !(it == s); + } + }; + + public: + // The only functions needed to make this a proper range: `begin()` and + // `end()`. + Iterator begin() { + derived().start(); + return Iterator{this}; + } + Sentinel end() const { return {}; }; +}; + +// A similar mixin to the above, with slightly different characteristics: +// 1. It only requires a single function `std::optional get() +// override` +// 2. It uses simple inheritance with virtual functions, which allows for type +// erasure of different ranges with the same `ValueType`. +// 3. While the interface is simpler (see 1.+2.) each step in iterating is a +// little bit more complex, as the mixin has to store the value. This might be +// less efficient for very simple generators, because the compiler might be able +// to optimize this mixin as well as the one above. +template +class InputRangeFromGet { + public: + using Storage = std::optional; + Storage storage_ = std::nullopt; + + private: + // The single virtual function which has to be overloaded. `std::nullopt` + // means that there will be no more values. + virtual Storage get() = 0; + + public: + virtual ~InputRangeFromGet() = default; + + // Get the next value and store it. + void getNextAndStore() { storage_ = get(); } + + struct Sentinel {}; + class Iterator { + public: + using iterator_category = std::input_iterator_tag; + using difference_type = std::int64_t; + using value_type = typename InputRangeFromGet::Storage::value_type; + using pointer = value_type*; + using reference = std::add_lvalue_reference_t; + using const_reference = std::add_const_t; + InputRangeFromGet* mixin_ = nullptr; + + public: + Iterator() = default; + explicit Iterator(InputRangeFromGet* mixin) : mixin_{mixin} {} + Iterator& operator++() { + mixin_->getNextAndStore(); + return *this; + } + + // Needed for the `range` concept. + void operator++(int) { (void)operator++(); } + + reference operator*() { return mixin_->storage_.value(); } + const_reference operator*() const { return mixin_->storage_.value(); } + decltype(auto) operator->() { return std::addressof(operator*()); } + decltype(auto) operator->() const { return std::addressof(operator*()); } + + friend bool operator==(const Iterator& it, Sentinel) { + return !it.mixin_->storage_.has_value(); + } + friend bool operator==(Sentinel s, const Iterator& it) { return it == s; } + friend bool operator!=(const Iterator& it, Sentinel s) { + return !(it == s); + } + friend bool operator!=(Sentinel s, const Iterator& it) { + return !(it == s); + } + }; + + Iterator begin() { + getNextAndStore(); + return Iterator{this}; + } + Sentinel end() const { return {}; }; +}; + +// This class takes an arbitrary input range, and turns it into a class that +// inherits from `InputRangeFromGet` (see above). While this adds a layer of +// indirection, it makes type erasure between input ranges with the same value +// type very simple. +template +class RangeToInputRangeFromGet + : public InputRangeFromGet> { + Range range_; + using Iterator = ql::ranges::iterator_t; + std::optional iterator_ = std::nullopt; + bool isDone() { return iterator_ == ql::ranges::end(range_); } + + public: + explicit RangeToInputRangeFromGet(Range range) : range_{std::move(range)} {} + + // As we use the `InputRangeOptionalMixin`, we only have to override the + // single `get()` method. + std::optional> get() override { + if (!iterator_.has_value()) { + // For the very first value we have to call `begin()`. + iterator_ = ql::ranges::begin(range_); + if (isDone()) { + return std::nullopt; + } + } else { + // Not the first value, so we have to advance the iterator. + if (isDone()) { + return std::nullopt; + } + ++iterator_.value(); + } + + // We now have advanced the iterator to the next value, so we can return it + // if existing. + if (isDone()) { + return std::nullopt; + } + return std::move(*iterator_.value()); + } +}; + +// A simple type-erased input range (that is, one class for *any* input range +// with the given `ValueType`). It internally uses the `InputRangeOptionalMixin` +// from above as an implementation detail. +template +class InputRangeTypeErased { + // Unique (and therefore owning) pointer to the virtual base class. + std::unique_ptr> impl_; + + public: + // Constructor for ranges that directly inherit from + // `InputRangeOptionalMixin`. + template + requires std::is_base_of_v, Range> + explicit InputRangeTypeErased(Range range) + : impl_{std::make_unique(std::move(range))} {} + + // Constructor for all other ranges. We first pass them through the + // `InputRangeToOptional` class from above to make it compatible with the base + // class. + template + requires(!std::is_base_of_v, Range> && + ql::ranges::range && + std::same_as, ValueType>) + explicit InputRangeTypeErased(Range range) + : impl_{std::make_unique>( + std::move(range))} {} + + decltype(auto) begin() { return impl_->begin(); } + decltype(auto) end() { return impl_->end(); } + using iterator = typename InputRangeFromGet::Iterator; +}; } // namespace ad_utility #endif // QLEVER_ITERATORS_H diff --git a/src/util/TypeTraits.h b/src/util/TypeTraits.h index 07703494f7..7d39e4aa70 100644 --- a/src/util/TypeTraits.h +++ b/src/util/TypeTraits.h @@ -12,6 +12,7 @@ #include #include +#include "backports/algorithm.h" #include "util/Forward.h" namespace ad_utility { @@ -135,7 +136,7 @@ concept SimilarToAny = (... || isSimilar); /// True iff `T` is the same as any of the `Ts...`. template -concept SameAsAny = (... || std::same_as); +concept SameAsAny = (... || ql::concepts::same_as); /* The implementation for `SimilarToAnyTypeIn` and `SameAsAnyTypeIn` (see below diff --git a/src/util/Views.h b/src/util/Views.h index 4666a28228..74e6eec14c 100644 --- a/src/util/Views.h +++ b/src/util/Views.h @@ -5,55 +5,20 @@ #pragma once #include -#include #include #include "backports/algorithm.h" #include "backports/concepts.h" #include "util/Generator.h" +#include "util/Iterators.h" #include "util/Log.h" namespace ad_utility { -/// Takes a input-iterable and yields the elements of that view (no visible -/// effect). The iteration over the input view is done on a separate thread with -/// a buffer size of `blockSize`. This might speed up the computation when the -/// values of the input view are expensive to compute. -template -cppcoro::generator bufferedAsyncView( - View view, uint64_t blockSize) { - using value_type = typename View::value_type; - auto it = view.begin(); - auto end = view.end(); - auto getNextBlock = [&it, &end, blockSize] { - std::vector buffer; - buffer.reserve(blockSize); - size_t i = 0; - while (i < blockSize && it != end) { - buffer.push_back(*it); - ++it; - ++i; - } - return buffer; - }; - - auto block = getNextBlock(); - auto future = std::async(std::launch::async, getNextBlock); - while (true) { - for (auto& element : block) { - co_yield element; - } - block = future.get(); - if (block.empty()) { - co_return; - } - future = std::async(std::launch::async, getNextBlock); - } -} - /// Takes a view and yields the elements of the same view, but skips over /// consecutive duplicates. -template +template > cppcoro::generator uniqueView(SortedView view) { size_t numInputs = 0; size_t numUnique = 0; @@ -115,7 +80,7 @@ cppcoro::generator uniqueBlockView( } // A view that owns its underlying storage. It is a replacement for -// `std::ranges::owning_view` which is not yet supported by `GCC 11` and +// `ranges::owning_view` which is not yet supported by `GCC 11` and // `range-v3`. The implementation is taken from libstdc++-13. The additional // optional `supportsConst` argument explicitly disables const iteration for // this view when set to false, see `OwningViewNoConst` below for details. @@ -225,7 +190,7 @@ CPP_concept can_ref_view = CPP_requires_ref(can_ref_view, Range); // implementations. template constexpr auto allView(Range&& range) { - if constexpr (std::ranges::view>) { + if constexpr (ql::ranges::view>) { return AD_FWD(range); } else if constexpr (detail::can_ref_view) { return ql::ranges::ref_view{AD_FWD(range)}; @@ -234,6 +199,69 @@ constexpr auto allView(Range&& range) { } } +namespace detail { +// The implementation of `bufferedAsyncView` (see below). It yields its result +// in blocks. +template +struct BufferedAsyncView : InputRangeMixin> { + View view_; + uint64_t blockSize_; + bool finished_ = false; + + explicit BufferedAsyncView(View view, uint64_t blockSize) + : view_{std::move(view)}, blockSize_{blockSize} { + AD_CONTRACT_CHECK(blockSize_ > 0); + } + + ql::ranges::iterator_t it_; + ql::ranges::sentinel_t end_ = ql::ranges::end(view_); + using value_type = ql::ranges::range_value_t; + std::future> future_; + + std::vector buffer_; + std::vector getNextBlock() { + std::vector buffer; + buffer.reserve(blockSize_); + size_t i = 0; + while (i < blockSize_ && it_ != end_) { + buffer.push_back(*it_); + ++it_; + ++i; + } + return buffer; + }; + + void start() { + it_ = view_.begin(); + buffer_ = getNextBlock(); + finished_ = buffer_.empty(); + future_ = + std::async(std::launch::async, [this]() { return getNextBlock(); }); + } + bool isFinished() { return finished_; } + auto& get() { return buffer_; } + const auto& get() const { return buffer_; } + + void next() { + buffer_ = future_.get(); + finished_ = buffer_.empty(); + future_ = + std::async(std::launch::async, [this]() { return getNextBlock(); }); + } +}; +} // namespace detail + +/// Takes a input-iterable and yields the elements of that view (no visible +/// effect). The iteration over the input view is done on a separate thread with +/// a buffer size of `blockSize`. This might speed up the computation when the +/// values of the input view are expensive to compute. +/// +template +auto bufferedAsyncView(View view, uint64_t blockSize) { + return ql::views::join( + allView(detail::BufferedAsyncView{std::move(view), blockSize})); +} + // Returns a view that contains all the values in `[0, upperBound)`, similar to // Python's `range` function. Avoids the common pitfall in `ql::views::iota` // that the count variable is only derived from the first argument. For example, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c260f34d4e..46a83d5b7a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,7 +7,7 @@ add_subdirectory(util) # general test utilities and all libraries that are specified as additional # arguments. function(linkTest basename) - qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) + qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main ${CMAKE_THREAD_LIBS_INIT}) endfunction() # Add the executable ${basename} that is compiled from the source file @@ -47,15 +47,12 @@ else () message(STATUS "The tests are split over multiple binaries") endif () -# Usage: `addAndLinkTest(basename, [additionalLibraries...]` -# Add a GTest/GMock test case that is called `basename` and compiled from a file called -# `basename.cpp`. All tests are linked against `gmock_main` and the threading library. -# additional libraries against which the test case has to be linked can be specified as -# additional arguments after the `basename` -function(addLinkAndDiscoverTest basename) + +# The implementation of `addLinkAndDiscoverTest` and `addLinkandDiscoverTestNoLibs` below. +function(addLinkAndDiscoverTestImpl basename) if (SINGLE_TEST_BINARY) target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN} ) else () addTest(${basename}) linkAndDiscoverTest(${basename} ${ARGN}) @@ -63,6 +60,24 @@ function(addLinkAndDiscoverTest basename) endfunction() +# Usage: `addAndLinkTest[NoLibs](basename, [additionalLibraries...]` +# Add a GTest/GMock test case that is called `basename` and compiled from a file called +# `basename.cpp`. All tests are linked against `gmock_main` and the threading library. +# additional libraries against which the test case has to be linked can be specified as +# additional arguments after the `basename` + +# This function links the test against `testUtil` (basically all of QLever). +function(addLinkAndDiscoverTest basename) + addLinkAndDiscoverTestImpl(${basename} ${ARGN} testUtil) +endfunction() + +# This function links only against Gtest + the explicitly specified libraries. +# It can be used for tests of standalone utils that don't require the rest of QLever. +function(addLinkAndDiscoverTestNoLibs basename) + addLinkAndDiscoverTestImpl(${basename} ${ARGN}) +endfunction() + + # Add a GTest/GMock test case that is called `basename` and compiled from a file called # `basename.cpp`. All tests are linked against `gmock_main` and the threading library. # In contrast to `addLinkAndDiscoverTest` this doesn't let ctest run all subtests individually, @@ -73,7 +88,7 @@ function(addLinkAndRunAsSingleTest basename) qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) else () addTest(${basename}) - linkTest(${basename} ${ARGN}) + linkTest(${basename} testUtil ${ARGN}) add_test(NAME ${basename} COMMAND ${basename}) endif () @@ -89,7 +104,7 @@ function(addLinkAndDiscoverTestSerial basename) qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) else () addTest(${basename}) - linkAndDiscoverTestSerial(${basename} ${ARGN}) + linkAndDiscoverTestSerial(${basename} testUtil ${ARGN}) endif () endfunction() @@ -241,14 +256,14 @@ addLinkAndDiscoverTest(MilestoneIdTest) addLinkAndDiscoverTest(VocabularyTest index) -addLinkAndDiscoverTest(IteratorTest) +addLinkAndDiscoverTestNoLibs(IteratorTest) # Stxxl currently always uses a file ./-stxxl.disk for all indices, which # makes it impossible to run the test cases for the Index class in parallel. # TODO fix this addLinkAndDiscoverTestSerial(BackgroundStxxlSorterTest ${STXXL_LIBRARIES}) -addLinkAndDiscoverTest(ViewsTest) +addLinkAndDiscoverTestNoLibs(ViewsTest) addLinkAndDiscoverTest(ForwardTest) diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index e6586061ea..7df2617c3f 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -21,7 +21,7 @@ namespace { ValueId asBool(bool value) { return Id::makeFromBool(value); } // Convert a generator to a vector for easier comparison in assertions -std::vector toVector(Result::Generator generator) { +std::vector toVector(Result::LazyResult generator) { std::vector result; for (auto& pair : generator) { // IMPORTANT: The `LocalVocab` contained in the pair will be destroyed at diff --git a/test/IteratorTest.cpp b/test/IteratorTest.cpp index af7de3f7ea..9ee5118e96 100644 --- a/test/IteratorTest.cpp +++ b/test/IteratorTest.cpp @@ -8,6 +8,7 @@ #include #include "../src/util/Iterators.h" +#include "backports/algorithm.h" auto testIterator = [](const auto& input, auto begin, auto end) { auto it = begin; @@ -95,3 +96,112 @@ TEST(Iterator, makeForwardingIterator) { ASSERT_EQ(1u, vector.size()); ASSERT_TRUE(vector[0].empty()); } + +namespace { +template +// This function tests a view that behaves like `ql::views::iota`. +// The argument `makeIotaRange` is given a lower bound (size_t, `0` if not +// specified) and an upper bound (`optional`, unlimited (nullopt) if not +// specified) and must return a `ql::ranges::input_range` that yields the +// elements in the range `(lower, upper]`. +void testIota(MakeIotaRange makeIotaRange) { + size_t sum = 0; + // Test manual iteration. + for (auto s : makeIotaRange(0, 5)) { + sum += s; + } + EXPECT_EQ(sum, 10); + + // Check that the range is an input range, but fulfills none of the stricter + // categories. + auto iota = makeIotaRange(); + using Iota = decltype(iota); + static_assert(ql::ranges::input_range); + static_assert(!ql::ranges::forward_range); + + // Test the interaction with the `ql::views` and `ql::ranges` machinery. + auto view = iota | ql::views::drop(3) | ql::views::take(7); + static_assert(ql::ranges::input_range); + sum = 0; + auto add = [&sum](auto val) { sum += val; }; + ql::ranges::for_each(view, add); + + // 42 == 3 + 4 + ... + 9 + EXPECT_EQ(sum, 42); +} +} // namespace + +// _____________________________________________________________________________ +TEST(Iterator, InputRangeMixin) { + using namespace ad_utility; + struct Iota : InputRangeMixin { + size_t value_ = 0; + std::optional upper_; + explicit Iota(size_t lower = 0, std::optional upper = {}) + : value_{lower}, upper_{upper} {} + void start() {} + bool isFinished() const { return value_ == upper_; } + size_t get() const { return value_; } + void next() { ++value_; } + }; + + auto makeIota = [](size_t lower = 0, std::optional upper = {}) { + return Iota{lower, upper}; + }; + testIota(makeIota); +} + +//_____________________________________________________________________________ +TEST(Iterator, InputRangeFromGet) { + using namespace ad_utility; + struct Iota : InputRangeFromGet { + size_t value_ = 0; + std::optional upper_; + explicit Iota(size_t lower = 0, std::optional upper = {}) + : value_{lower}, upper_{upper} {} + std::optional get() override { + if (value_ == upper_) { + return std::nullopt; + } + return value_++; + } + }; + auto makeIota = [](size_t lower = 0, std::optional upper = {}) { + return Iota{lower, upper}; + }; + testIota(makeIota); +} +//_____________________________________________________________________________ +TEST(Iterator, InputRangeTypeErased) { + using namespace ad_utility; + struct IotaImpl : InputRangeFromGet { + size_t value_ = 0; + std::optional upper_; + explicit IotaImpl(size_t lower = 0, std::optional upper = {}) + : value_{lower}, upper_{upper} {} + std::optional get() override { + if (value_ == upper_) { + return std::nullopt; + } + return value_++; + } + }; + + using Iota = InputRangeTypeErased; + auto makeIota = [](size_t lower = 0, std::optional upper = {}) { + return Iota{IotaImpl{lower, upper}}; + }; + testIota(makeIota); + + // We can also type-erase any input range with the correct value type, in + // particular ranges and views from the standard library. + auto makeIotaFromStdIota = [](size_t lower = 0, + std::optional upper = {}) { + if (!upper.has_value()) { + return Iota{ql::views::iota(lower)}; + } else { + return Iota{ql::views::iota(lower, upper.value())}; + } + }; + testIota(makeIotaFromStdIota); +} diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 4ad1f1313c..ec9ab35c7f 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -24,10 +24,9 @@ using Status = RuntimeInformation::Status; namespace { // Helper function to perform actions at various stages of a generator -template +template > auto expectAtEachStageOfGenerator( - cppcoro::generator generator, - std::vector> functions, + Range generator, std::vector> functions, ad_utility::source_location l = ad_utility::source_location::current()) { auto locationTrace = generateLocationTrace(l); size_t index = 0; diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index b6fd694d94..4e0deb2a53 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -55,7 +55,7 @@ std::vector getAllSubSplits(const IdTable& idTable) { } // _____________________________________________________________________________ -void consumeGenerator(Result::Generator& generator) { +void consumeGenerator(Result::LazyResult& generator) { for ([[maybe_unused]] IdTableVocabPair& _ : generator) { } } diff --git a/test/ViewsTest.cpp b/test/ViewsTest.cpp index 24fc744afb..4c1a569b96 100644 --- a/test/ViewsTest.cpp +++ b/test/ViewsTest.cpp @@ -19,7 +19,7 @@ TEST(Views, BufferedAsyncView) { for (const auto& element : view) { result.push_back(element); } - ASSERT_EQ(result, inputVector); + EXPECT_THAT(result, ::testing::ContainerEq(inputVector)); }; uint64_t numElements = 1000; diff --git a/test/engine/DistinctTest.cpp b/test/engine/DistinctTest.cpp index c20d0ba5c6..5ce219b60f 100644 --- a/test/engine/DistinctTest.cpp +++ b/test/engine/DistinctTest.cpp @@ -14,7 +14,7 @@ using V = Variable; namespace { // Convert a generator to a vector for easier comparison in assertions -std::vector toVector(Result::Generator generator) { +std::vector toVector(Result::LazyResult generator) { std::vector result; for (auto& [table, vocab] : generator) { // IMPORTANT: The `vocab` will go out of scope here, but the tests don't use diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 2c526787a3..62f01647a0 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -21,6 +21,7 @@ using ad_utility::source_location; namespace { using Tc = TripleComponent; using Var = Variable; +using LazyResult = Result::LazyResult; using IndexPair = std::pair; @@ -866,8 +867,8 @@ TEST_P(IndexScanWithLazyJoin, prefilterTablesDoesFilterCorrectly) { co_yield p3; }; - auto [joinSideResults, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(this), 0)); + auto [joinSideResults, scanResults] = consumeGenerators( + scan.prefilterTables(LazyResult{makeJoinSide(this)}, 0)); ASSERT_EQ(scanResults.size(), 2); ASSERT_EQ(joinSideResults.size(), 3); @@ -910,8 +911,8 @@ TEST_P(IndexScanWithLazyJoin, co_yield p2; }; - auto [joinSideResults, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(this), 0)); + auto [joinSideResults, scanResults] = consumeGenerators( + scan.prefilterTables(LazyResult{makeJoinSide(this)}, 0)); ASSERT_EQ(scanResults.size(), 1); ASSERT_EQ(joinSideResults.size(), 2); @@ -944,7 +945,7 @@ TEST_P(IndexScanWithLazyJoin, }; auto [joinSideResults, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(), 0)); + consumeGenerators(scan.prefilterTables(LazyResult{makeJoinSide()}, 0)); ASSERT_EQ(scanResults.size(), 0); ASSERT_EQ(joinSideResults.size(), 0); @@ -973,8 +974,8 @@ TEST_P(IndexScanWithLazyJoin, prefilterTablesDoesNotFilterOnUndefined) { co_yield p7; }; - auto [_, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(this), 0)); + auto [_, scanResults] = consumeGenerators( + scan.prefilterTables(LazyResult{makeJoinSide(this)}, 0)); ASSERT_EQ(scanResults.size(), 3); EXPECT_TRUE(scanResults.at(0).localVocab_.empty()); @@ -1005,7 +1006,7 @@ TEST_P(IndexScanWithLazyJoin, prefilterTablesDoesNotFilterWithSingleUndefined) { }; auto [_, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(), 0)); + consumeGenerators(scan.prefilterTables(LazyResult{makeJoinSide()}, 0)); ASSERT_EQ(scanResults.size(), 3); EXPECT_TRUE(scanResults.at(0).localVocab_.empty()); @@ -1036,7 +1037,7 @@ TEST_P(IndexScanWithLazyJoin, prefilterTablesWorksWithSingleEmptyTable) { }; auto [_, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(), 0)); + consumeGenerators(scan.prefilterTables(LazyResult{makeJoinSide()}, 0)); ASSERT_EQ(scanResults.size(), 0); } @@ -1048,7 +1049,7 @@ TEST_P(IndexScanWithLazyJoin, prefilterTablesWorksWithEmptyGenerator) { auto makeJoinSide = []() -> Result::Generator { co_return; }; auto [_, scanResults] = - consumeGenerators(scan.prefilterTables(makeJoinSide(), 0)); + consumeGenerators(scan.prefilterTables(LazyResult{makeJoinSide()}, 0)); ASSERT_EQ(scanResults.size(), 0); } @@ -1075,7 +1076,7 @@ TEST(IndexScan, prefilterTablesWithEmptyIndexScanReturnsEmptyGenerators) { }; auto [leftGenerator, rightGenerator] = - scan.prefilterTables(makeJoinSide(), 0); + scan.prefilterTables(Result::LazyResult{makeJoinSide()}, 0); EXPECT_EQ(leftGenerator.begin(), leftGenerator.end()); EXPECT_EQ(rightGenerator.begin(), rightGenerator.end()); diff --git a/test/util/IdTableHelpers.cpp b/test/util/IdTableHelpers.cpp index 55ac6209f9..34ad9414e7 100644 --- a/test/util/IdTableHelpers.cpp +++ b/test/util/IdTableHelpers.cpp @@ -251,7 +251,7 @@ std::shared_ptr idTableToExecutionTree( // _____________________________________________________________________________ std::pair> aggregateTables( - Result::Generator generator, size_t numColumns) { + Result::LazyResult generator, size_t numColumns) { IdTable aggregateTable{numColumns, ad_utility::makeUnlimitedAllocator()}; std::vector localVocabs; for (auto& [idTable, localVocab] : generator) { diff --git a/test/util/IdTableHelpers.h b/test/util/IdTableHelpers.h index bc7035cd2f..40e2fe8213 100644 --- a/test/util/IdTableHelpers.h +++ b/test/util/IdTableHelpers.h @@ -260,4 +260,4 @@ std::shared_ptr idTableToExecutionTree( // Fully consume a given generator and store it in an `IdTable` and store the // local vocabs in a vector. std::pair> aggregateTables( - Result::Generator generator, size_t numColumns); + Result::LazyResult generator, size_t numColumns);