From 4a159948af0b56472898e34392754141c68e07cf Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 15:16:18 +0100 Subject: [PATCH] Make query planning of index scans fast again (#1674) Since #1619, the size estimate for an index scan always involved one or several copies of the block metadata, which incurred a significant query planning cost for most queries. Now, such a copy is only made for an index scan followed by a `FILTER` and only the metadata of those blocks is copied, which remain after the `FILTER` (in which case the two operations are expensive anyway). --- src/engine/IndexScan.cpp | 32 +++++++++++++++++--------------- src/engine/IndexScan.h | 9 ++++----- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index f56123a42b..5bf47dd4c8 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -339,24 +339,26 @@ IndexScan::getBlockMetadata() const { // _____________________________________________________________________________ std::optional> IndexScan::getBlockMetadataOptionallyPrefiltered() const { + // The code after this is expensive because it always copies the complete + // block metadata, so we do an early return of `nullopt` (which means "use all + // the blocks") if no prefilter is specified. + if (!prefilter_.has_value()) { + return std::nullopt; + } auto optBlockSpan = getBlockMetadata(); - std::optional> optBlocks = std::nullopt; - if (optBlockSpan.has_value()) { - const auto& blockSpan = optBlockSpan.value(); - optBlocks = {blockSpan.begin(), blockSpan.end()}; - applyPrefilterIfPossible(optBlocks.value()); + if (!optBlockSpan.has_value()) { + return std::nullopt; } - return optBlocks; + return applyPrefilter(optBlockSpan.value()); } // _____________________________________________________________________________ -void IndexScan::applyPrefilterIfPossible( - std::vector& blocks) const { - if (prefilter_.has_value()) { - // Apply the prefilter on given blocks. - auto& [prefilterExpr, columnIndex] = prefilter_.value(); - blocks = prefilterExpr->evaluate(blocks, columnIndex); - } +std::vector IndexScan::applyPrefilter( + std::span blocks) const { + AD_CORRECTNESS_CHECK(prefilter_.has_value() && getLimit().isUnconstrained()); + // Apply the prefilter on given blocks. + auto& [prefilterExpr, columnIndex] = prefilter_.value(); + return prefilterExpr->evaluate(blocks, columnIndex); } // _____________________________________________________________________________ @@ -369,12 +371,12 @@ Permutation::IdTableGenerator IndexScan::getLazyScan( auto filteredBlocks = getLimit().isUnconstrained() ? std::optional(std::move(blocks)) : std::nullopt; - if (filteredBlocks.has_value()) { + if (filteredBlocks.has_value() && prefilter_.has_value()) { // Note: The prefilter expression applied with applyPrefilterIfPossible() // is not related to the prefilter procedure mentioned in the comment above. // If this IndexScan owns a pair, it can // be applied. - applyPrefilterIfPossible(filteredBlocks.value()); + filteredBlocks = applyPrefilter(filteredBlocks.value()); } return getScanPermutation().lazyScan(getScanSpecification(), filteredBlocks, additionalColumns(), cancellationHandle_, diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index c10680f59e..d778260efe 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -223,11 +223,10 @@ class IndexScan final : public Operation { std::optional> getBlockMetadataOptionallyPrefiltered() const; - // If `isUnconstrained()` yields true, return the blocks as given or the - // prefiltered blocks (if `prefilter_` has value). If `isUnconstrained()` is - // false, return `std::nullopt`. - void applyPrefilterIfPossible( - std::vector& blocks) const; + // Apply the `prefilter_` to the `blocks`. May only be called if the limit is + // unconstrained, and a `prefilter_` exists. + std::vector applyPrefilter( + std::span blocks) const; // Helper functions for the public `getLazyScanFor...` methods and // `chunkedIndexScan` (see above).