diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index f56123a42b..54bb1a2dd0 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -204,15 +204,25 @@ std::shared_ptr IndexScan::makeCopyWithAddedPrefilters( } // _____________________________________________________________________________ -Result::Generator IndexScan::chunkedIndexScan() const { +Result::Generator IndexScan::chunkedIndexScan() { auto optBlockSpan = getBlockMetadata(); if (!optBlockSpan.has_value()) { co_return; } const auto& blockSpan = optBlockSpan.value(); + size_t numBlocksAll = blockSpan.size(); // Note: Given a `PrefilterIndexPair` is available, the corresponding // prefiltering will be applied in `getLazyScan`. - for (IdTable& idTable : getLazyScan({blockSpan.begin(), blockSpan.end()})) { + auto innerGenerator = getLazyScan({blockSpan.begin(), blockSpan.end()}); + auto setDetails = ad_utility::makeOnDestructionDontThrowDuringStackUnwinding( + [ + + this, numBlocksAll, &innerGenerator]() { + auto details = innerGenerator.details(); + details.numBlocksAll_ = numBlocksAll; + updateRuntimeInfoForLazyScan(details); + }); + for (IdTable& idTable : innerGenerator) { co_yield {std::move(idTable), LocalVocab{}}; } } @@ -339,7 +349,14 @@ IndexScan::getBlockMetadata() const { // _____________________________________________________________________________ std::optional> IndexScan::getBlockMetadataOptionallyPrefiltered() const { - auto optBlockSpan = getBlockMetadata(); + auto optBlockSpan = + [&]() -> std::optional> { + if (prefilteredBlocks_.has_value()) { + return prefilteredBlocks_.value(); + } else { + return getBlockMetadata(); + } + }(); std::optional> optBlocks = std::nullopt; if (optBlockSpan.has_value()) { const auto& blockSpan = optBlockSpan.value(); @@ -389,6 +406,7 @@ std::optional IndexScan::getMetadataForScan() }; // _____________________________________________________________________________ +// TODO This can be removed now. std::array IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) { AD_CONTRACT_CHECK(s1.numVariables_ <= 3 && s2.numVariables_ <= 3); @@ -656,3 +674,57 @@ std::pair IndexScan::prefilterTables( return {createPrefilteredJoinSide(state), createPrefilteredIndexScanSide(state)}; } + +// _____________________________________________________________________________ +void IndexScan::setBlocksForJoinOfIndexScans(Operation* left, + Operation* right) { + auto& leftScan = dynamic_cast(*left); + auto& rightScan = dynamic_cast(*right); + + auto getBlocks = [](IndexScan& scan) { + auto metaBlocks = scan.getMetadataForScan(); + if (!metaBlocks.has_value()) { + return metaBlocks; + } + if (scan.prefilteredBlocks_.has_value()) { + metaBlocks.value().blockMetadata_ = scan.prefilteredBlocks_.value(); + } + return metaBlocks; + }; + + auto metaBlocks1 = getBlocks(leftScan); + auto metaBlocks2 = getBlocks(rightScan); + if (!metaBlocks1.has_value() || !metaBlocks2.has_value()) { + return; + } + auto [blocks1, blocks2] = CompressedRelationReader::getBlocksForJoin( + metaBlocks1.value(), metaBlocks2.value()); + leftScan.prefilteredBlocks_ = std::move(blocks1); + rightScan.prefilteredBlocks_ = std::move(blocks2); +} + +// _____________________________________________________________________________ +std::vector IndexScan::getIndexScansForSortVariables( + std::span variables) { + const auto& sorted = resultSortedOn(); + if (resultSortedOn().size() < variables.size()) { + return {}; + } + const auto& varColMap = getExternallyVisibleVariableColumns(); + for (size_t i = 0; i < variables.size(); ++i) { + auto it = varColMap.find(variables[i]); + if (it == varColMap.end() || + it->second.columnIndex_ != resultSortedOn().at(i)) { + return {}; + } + } + return {this}; +} + +// _____________________________________________________________________________ +void IndexScan::setPrefilteredBlocks( + std::vector prefilteredBlocks) { + prefilteredBlocks_ = std::move(prefilteredBlocks); + // TODO once the other PR is merged we have to assert that the result + // is never cached AD_CORRECTNESS_CHECK(!canBeStoredInCache()); +} diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index c10680f59e..fa19037e76 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -36,6 +36,9 @@ class IndexScan final : public Operation { std::vector additionalColumns_; std::vector additionalVariables_; + // TODO Comment + std::optional> prefilteredBlocks_; + public: IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, const SparqlTriple& triple, Graphs graphsToFilter = std::nullopt, @@ -108,6 +111,9 @@ class IndexScan final : public Operation { std::pair prefilterTables( Result::Generator input, ColumnIndex joinColumn); + // TODO Comment + static void setBlocksForJoinOfIndexScans(Operation* left, Operation* right); + private: // Implementation detail that allows to consume a generator from two other // cooperating generators. Needs to be forward declared as it is used by @@ -202,7 +208,7 @@ class IndexScan final : public Operation { PrefilterIndexPair prefilter) const; // Return the (lazy) `IdTable` for this `IndexScan` in chunks. - Result::Generator chunkedIndexScan() const; + Result::Generator chunkedIndexScan(); // Get the `IdTable` for this `IndexScan` in one piece. IdTable materializedIndexScan() const; @@ -234,4 +240,11 @@ class IndexScan final : public Operation { Permutation::IdTableGenerator getLazyScan( std::vector blocks) const; std::optional getMetadataForScan() const; + + // TODO Comment. + void setPrefilteredBlocks( + std::vector prefilteredBlocks); + + std::vector getIndexScansForSortVariables( + std::span variables) override; }; diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index d3c5370e16..9212b6d7d6 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -181,6 +181,16 @@ ProtoResult Join::computeResult(bool requestLaziness) { auto rightResIfCached = getCachedOrSmallResult(*_right); checkCancellation(); + // TODO Copy and move to separate function. + std::span joinVarSpan{&_joinVar, 1}; + auto leftIndexScans = _left->getIndexScansForSortVariables(joinVarSpan); + auto rightIndexScans = _right->getIndexScansForSortVariables(joinVarSpan); + for (auto* left : leftIndexScans) { + for (auto* right : rightIndexScans) { + IndexScan::setBlocksForJoinOfIndexScans(left, right); + } + } + auto leftIndexScan = std::dynamic_pointer_cast(_left->getRootOperation()); if (leftIndexScan && @@ -189,9 +199,6 @@ ProtoResult Join::computeResult(bool requestLaziness) { AD_CORRECTNESS_CHECK(rightResIfCached->isFullyMaterialized()); return computeResultForIndexScanAndIdTable( requestLaziness, std::move(rightResIfCached), leftIndexScan); - - } else if (!leftResIfCached) { - return computeResultForTwoIndexScans(requestLaziness); } } @@ -216,9 +223,10 @@ ProtoResult Join::computeResult(bool requestLaziness) { if (leftRes->isFullyMaterialized()) { return computeResultForIndexScanAndIdTable( requestLaziness, std::move(leftRes), rightIndexScan); + } else if (!leftIndexScan) { + return computeResultForIndexScanAndLazyOperation( + requestLaziness, std::move(leftRes), rightIndexScan); } - return computeResultForIndexScanAndLazyOperation( - requestLaziness, std::move(leftRes), rightIndexScan); } std::shared_ptr rightRes = @@ -647,47 +655,6 @@ void Join::addCombinedRowToIdTable(const ROW_A& rowA, const ROW_B& rowB, } } -// ______________________________________________________________________________________________________ -ProtoResult Join::computeResultForTwoIndexScans(bool requestLaziness) const { - return createResult( - requestLaziness, - [this](std::function yieldTable) { - auto leftScan = - std::dynamic_pointer_cast(_left->getRootOperation()); - auto rightScan = - std::dynamic_pointer_cast(_right->getRootOperation()); - AD_CORRECTNESS_CHECK(leftScan && rightScan); - // The join column already is the first column in both inputs, so we - // don't have to permute the inputs and results for the - // `AddCombinedRowToIdTable` class to work correctly. - AD_CORRECTNESS_CHECK(_leftJoinCol == 0 && _rightJoinCol == 0); - auto rowAdder = makeRowAdder(std::move(yieldTable)); - - ad_utility::Timer timer{ - ad_utility::timer::Timer::InitialStatus::Started}; - auto [leftBlocksInternal, rightBlocksInternal] = - IndexScan::lazyScanForJoinOfTwoScans(*leftScan, *rightScan); - runtimeInfo().addDetail("time-for-filtering-blocks", timer.msecs()); - - auto leftBlocks = convertGenerator(std::move(leftBlocksInternal)); - auto rightBlocks = convertGenerator(std::move(rightBlocksInternal)); - - ad_utility::zipperJoinForBlocksWithoutUndef(leftBlocks, rightBlocks, - std::less{}, rowAdder); - - leftScan->updateRuntimeInfoForLazyScan(leftBlocks.details()); - rightScan->updateRuntimeInfoForLazyScan(rightBlocks.details()); - - AD_CORRECTNESS_CHECK(leftBlocks.details().numBlocksRead_ <= - rightBlocks.details().numElementsRead_); - AD_CORRECTNESS_CHECK(rightBlocks.details().numBlocksRead_ <= - leftBlocks.details().numElementsRead_); - auto localVocab = std::move(rowAdder.localVocab()); - return Result::IdTableVocabPair{std::move(rowAdder).resultTable(), - std::move(localVocab)}; - }); -} - // ______________________________________________________________________________________________________ template ProtoResult Join::computeResultForIndexScanAndIdTable( @@ -834,3 +801,11 @@ ad_utility::AddCombinedRowToIdTable Join::makeRowAdder( 1, IdTable{getResultWidth(), allocator()}, cancellationHandle_, CHUNK_SIZE, std::move(callback)}; } +// _____________________________________________________________________________ +std::vector Join::getIndexScansForSortVariables( + std::span variables) { + auto result = _left->getIndexScansForSortVariables(variables); + auto right = _right->getIndexScansForSortVariables(variables); + result.insert(result.end(), right.begin(), right.end()); + return result; +} diff --git a/src/engine/Join.h b/src/engine/Join.h index 8c8978c8d3..823177cfd8 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -141,6 +141,10 @@ class Join : public Operation { static void hashJoin(const IdTable& dynA, ColumnIndex jc1, const IdTable& dynB, ColumnIndex jc2, IdTable* dynRes); + // TODO Comment. + std::vector getIndexScansForSortVariables( + std::span variables) override; + protected: virtual string getCacheKeyImpl() const override; @@ -149,11 +153,6 @@ class Join : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; - // A special implementation that is called when both children are - // `IndexScan`s. Uses the lazy scans to only retrieve the subset of the - // `IndexScan`s that is actually needed without fully materializing them. - ProtoResult computeResultForTwoIndexScans(bool requestLaziness) const; - // A special implementation that is called when exactly one of the children is // an `IndexScan` and the other one is a fully materialized result. The // argument `idTableIsRightInput` determines whether the `IndexScan` is the diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 3e06a9498e..6cebffc0b5 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -426,6 +426,12 @@ class Operation { RuntimeInformation::Status status = RuntimeInformation::Status::optimizedOut); + // TODO Comment. + virtual std::vector getIndexScansForSortVariables( + [[maybe_unused]] std::span variables) { + return {}; + } + private: // Create the runtime information in case the evaluation of this operation has // failed. diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index c9496fe958..784729fb67 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -225,3 +225,18 @@ QueryExecutionTree::getVariableAndInfoByColumnIndex(ColumnIndex colIdx) const { AD_CONTRACT_CHECK(it != varColMap.end()); return *it; } + +// _____________________________________________________________________________ +std::vector QueryExecutionTree::getIndexScansForSortVariables( + std::span variables) { + auto result = rootOperation_->getIndexScansForSortVariables(variables); + if (result.empty()) { + return result; + } + // TODO We have to disable the caching as soon as the PR for that is + // merged. + // rootOperation_->disableCaching(); + cachedResult_.reset(); + sizeEstimate_.reset(); + return result; +} diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 0eac785f16..b93a6b1a1f 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -122,6 +122,10 @@ class QueryExecutionTree { return rootOperation_->collectWarnings(); } + // TODO Comment. + virtual std::vector getIndexScansForSortVariables( + std::span variables) final; + template void forAllDescendants(F f) { static_assert( diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index 36cbf14af0..7f20527ad9 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -466,7 +466,7 @@ class CompressedRelationReader { // to be performed. struct ScanSpecAndBlocks { ScanSpecification scanSpec_; - const std::span blockMetadata_; + std::span blockMetadata_; }; // This struct additionally contains the first and last triple of the scan