From 58a38aafbf8c6e323aa295633a28cc07e13344fd Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Thu, 19 Dec 2024 16:56:44 +0800 Subject: [PATCH 01/17] Storages: load RSResult only once Signed-off-by: Lloyd-Pottiger --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 12 +- dbms/src/Storages/DeltaMerge/File/DMFile.h | 4 +- .../File/DMFileBlockInputStream.cpp | 38 +----- .../DeltaMerge/File/DMFileBlockInputStream.h | 16 ++- .../DeltaMerge/File/DMFilePackFilter.cpp | 83 ++++-------- .../DeltaMerge/File/DMFilePackFilter.h | 105 ++++------------ .../File/DMFilePackFilterResult.cpp | 83 ++++++++++++ .../DeltaMerge/File/DMFilePackFilterResult.h | 106 ++++++++++++++++ .../DeltaMerge/File/DMFilePackFilter_fwd.h | 4 + .../Storages/DeltaMerge/File/DMFileReader.cpp | 4 +- .../Storages/DeltaMerge/File/DMFileReader.h | 4 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 119 +++++++++--------- dbms/src/Storages/DeltaMerge/Segment.h | 17 +-- .../Storages/DeltaMerge/SegmentReadTask.cpp | 2 +- .../Storages/DeltaMerge/StableValueSpace.cpp | 106 +++++++++++----- .../Storages/DeltaMerge/StableValueSpace.h | 21 +++- .../DeltaMerge/tests/gtest_dm_file.cpp | 20 +-- .../DeltaMerge/tests/gtest_dm_segment.cpp | 11 +- .../tests/gtest_dm_vector_index.cpp | 55 ++++---- .../DeltaMerge/tests/gtest_segment_bitmap.cpp | 15 ++- .../tests/gtest_segment_test_basic.cpp | 2 +- .../gtest_skippable_block_input_stream.cpp | 1 + 22 files changed, 488 insertions(+), 340 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp create mode 100644 dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 64469af3f72..90aa65c10ca 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -38,21 +38,13 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil void ColumnFileBig::calculateStat(const DMContext & dm_context) { - auto index_cache = dm_context.global_context.getMinMaxIndexCache(); - auto pack_filter = DMFilePackFilter::loadFrom( + dm_context, file, - index_cache, /*set_cache_if_miss*/ false, {segment_range}, EMPTY_RS_OPERATOR, - {}, - dm_context.global_context.getFileProvider(), - dm_context.getReadLimiter(), - dm_context.scan_context, - /*tracing_id*/ dm_context.tracing_id, - ReadTag::Internal); - + {}); std::tie(valid_rows, valid_bytes) = pack_filter.validRowsAndBytes(); } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index 76c28975ddb..d6751f2f0d9 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -210,6 +210,8 @@ class DMFile : private boost::noncopyable UInt32 metaVersion() const { return meta->metaVersion(); } + bool isColIndexExist(const ColId & col_id) const; + private: DMFile( UInt64 file_id_, @@ -293,8 +295,6 @@ class DMFile : private boost::noncopyable String colIndexCacheKey(const FileNameBase & file_name_base) const; String colMarkCacheKey(const FileNameBase & file_name_base) const; - bool isColIndexExist(const ColId & col_id) const; - String encryptionBasePath() const; EncryptionPath encryptionDataPath(const FileNameBase & file_name_base) const; EncryptionPath encryptionIndexPath(const FileNameBase & file_name_base) const; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 1f4dff89f00..b3aee6221fb 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -19,7 +19,6 @@ #include #include - namespace DB::DM { @@ -58,19 +57,6 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; - DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom( - dmfile, - index_cache, - /*set_cache_if_miss*/ true, - rowkey_ranges, - rs_filter, - read_packs, - file_provider, - read_limiter, - scan_context, - tracing_id, - read_tag); - bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); if (!enable_read_thread || max_sharing_column_bytes_for_all <= 0) @@ -87,7 +73,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( enable_del_clean_read, is_fast_scan, max_data_version, - std::move(pack_filter), + *pack_filter, mark_cache, enable_column_cache, column_cache, @@ -140,18 +126,13 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn return build(dmfile, read_columns, rowkey_ranges, scan_context); }; - if (!rs_filter) - return fallback(); - - auto filter_with_ann = std::dynamic_pointer_cast(rs_filter); - if (!filter_with_ann) + if (!ann_query_info) return fallback(); if (!bitmap_filter.has_value()) return fallback(); Block header_layout = toEmptyBlock(read_columns); - auto ann_query_info = filter_with_ann->ann_query_info; // Copy out the vector column for later use. Copy is intentionally performed after the // fast check so that in fallback conditions we don't need unnecessary copies. @@ -181,19 +162,6 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn // All check passed. Let's read via vector index. - DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom( - dmfile, - index_cache, - /*set_cache_if_miss*/ true, - rowkey_ranges, - rs_filter, - read_packs, - file_provider, - read_limiter, - scan_context, - tracing_id, - ReadTag::Query); - bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; @@ -205,7 +173,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn enable_del_clean_read, is_fast_scan, max_data_version, - std::move(pack_filter), + *pack_filter, mark_cache, enable_column_cache, column_cache, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h index 456999aa4c9..a2a89ae7f26 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h @@ -138,9 +138,9 @@ class DMFileBlockInputStreamBuilder return *this; } - DMFileBlockInputStreamBuilder & setRSOperator(const RSOperatorPtr & filter_) + DMFileBlockInputStreamBuilder setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_) { - rs_filter = filter_; + ann_query_info = ann_query_info_; return *this; } @@ -180,6 +180,12 @@ class DMFileBlockInputStreamBuilder return *this; } + DMFileBlockInputStreamBuilder & setDMFilePackFilterResult(const DMFilePackFilterResultPtr & pack_filter_) + { + pack_filter = pack_filter_; + return *this; + } + /** * @note To really enable the long term cache, you also need to ensure * ColumnCacheLongTerm is initialized in the global context. @@ -217,8 +223,6 @@ class DMFileBlockInputStreamBuilder bool is_fast_scan = false; bool enable_del_clean_read = false; UInt64 max_data_version = std::numeric_limits::max(); - // Rough set filter - RSOperatorPtr rs_filter; // packs filter (filter by pack index) IdSetPtr read_packs; MarkCachePtr mark_cache; @@ -234,6 +238,10 @@ class DMFileBlockInputStreamBuilder String tracing_id; ReadTag read_tag = ReadTag::Internal; + DMFilePackFilterResultPtr pack_filter; + + ANNQueryInfoPtr ann_query_info = nullptr; + VectorIndexCachePtr vector_index_cache; // Note: Currently thie field is assigned only for Stable streams, not available for ColumnFileBig std::optional bitmap_filter; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index e465f11caad..3bbe0bd0967 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -1,5 +1,4 @@ - -// Copyright 2023 PingCAP, Inc. +// Copyright 2024 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,20 +17,20 @@ #include #include -#include namespace DB::DM { -void DMFilePackFilter::init(ReadTag read_tag) +DMFilePackFilterResult DMFilePackFilter::load(const DMContext & dm_context) { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); + DMFilePackFilterResult result(dm_context, dmfile, pack_count); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + tryLoadIndex(result.param, EXTRA_HANDLE_COLUMN_ID); std::vector handle_filters; for (auto & rowkey_range : rowkey_ranges) handle_filters.emplace_back(toFilter(rowkey_range)); @@ -64,16 +63,16 @@ void DMFilePackFilter::init(ReadTag read_tag) #endif for (size_t i = 0; i < pack_count; ++i) { - handle_res[i] = RSResult::None; + result.handle_res[i] = RSResult::None; } for (auto & handle_filter : handle_filters) { - auto res = handle_filter->roughCheck(0, pack_count, param); + auto res = handle_filter->roughCheck(0, pack_count, result.param); std::transform( - handle_res.begin(), - handle_res.end(), + result.handle_res.begin(), + result.handle_res.end(), res.begin(), - handle_res.begin(), + result.handle_res.begin(), [](RSResult a, RSResult b) { return a || b; }); } } @@ -81,18 +80,18 @@ void DMFilePackFilter::init(ReadTag read_tag) ProfileEvents::increment(ProfileEvents::DMFileFilterNoFilter, pack_count); /// Check packs by handle_res - pack_res = handle_res; - auto after_pk = countUsePack(); + result.pack_res = result.handle_res; + auto after_pk = result.countUsePack(); /// Check packs by read_packs if (read_packs) { for (size_t i = 0; i < pack_count; ++i) { - pack_res[i] = read_packs->contains(i) ? pack_res[i] : RSResult::None; + result.pack_res[i] = read_packs->contains(i) ? result.pack_res[i] : RSResult::None; } } - auto after_read_packs = countUsePack(); + auto after_read_packs = result.countUsePack(); ProfileEvents::increment(ProfileEvents::DMFileFilterAftPKAndPackSet, after_read_packs); /// Check packs by filter in where clause @@ -102,36 +101,30 @@ void DMFilePackFilter::init(ReadTag read_tag) ColIds ids = filter->getColumnIDs(); for (const auto & id : ids) { - tryLoadIndex(id); + tryLoadIndex(result.param, id); } - const auto check_results = filter->roughCheck(0, pack_count, param); + const auto check_results = filter->roughCheck(0, pack_count, result.param); std::transform( - pack_res.cbegin(), - pack_res.cend(), + result.pack_res.cbegin(), + result.pack_res.cend(), check_results.cbegin(), - pack_res.begin(), + result.pack_res.begin(), [](RSResult a, RSResult b) { return a && b; }); } else { // ColumnFileBig in DeltaValueSpace never pass a filter to DMFilePackFilter. // Assume its filter always return Some. - std::transform(pack_res.cbegin(), pack_res.cend(), pack_res.begin(), [](RSResult a) { + std::transform(result.pack_res.cbegin(), result.pack_res.cend(), result.pack_res.begin(), [](RSResult a) { return a && RSResult::Some; }); } - auto [none_count, some_count, all_count, all_null_count] = countPackRes(); + auto [none_count, some_count, all_count, all_null_count] = result.countPackRes(); auto after_filter = some_count + all_count + all_null_count; ProfileEvents::increment(ProfileEvents::DMFileFilterAftRoughSet, after_filter); - // In table scanning, DMFilePackFilter of a DMFile may be created several times: - // 1. When building MVCC bitmap (ReadTag::MVCC). - // 2. When building LM filter stream (ReadTag::LM). - // 3. When building stream of other columns (ReadTag::Query). - // Only need to count the filter result once. - // TODO: We can create DMFilePackFilter at the beginning and pass it to the stages described above. - if (read_tag == ReadTag::Query) + if (scan_context) { scan_context->rs_pack_filter_none += none_count; scan_context->rs_pack_filter_some += some_count; @@ -148,8 +141,7 @@ void DMFilePackFilter::init(ReadTag read_tag) LOG_DEBUG( log, "RSFilter exclude rate: {:.2f}, after_pk: {}, after_read_packs: {}, after_filter: {}, handle_ranges: {}" - ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}, all_null_count: {}, " - "read_tag: {}", + ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}, all_null_count: {}", ((after_read_packs == 0) ? std::numeric_limits::quiet_NaN() : filter_rate), after_pk, after_read_packs, @@ -160,33 +152,8 @@ void DMFilePackFilter::init(ReadTag read_tag) none_count, some_count, all_count, - all_null_count, - magic_enum::enum_name(read_tag)); -} - -std::tuple DMFilePackFilter::countPackRes() const -{ - UInt64 none_count = 0; - UInt64 some_count = 0; - UInt64 all_count = 0; - UInt64 all_null_count = 0; - for (auto res : pack_res) - { - if (res == RSResult::None || res == RSResult::NoneNull) - ++none_count; - else if (res == RSResult::Some || res == RSResult::SomeNull) - ++some_count; - else if (res == RSResult::All) - ++all_count; - else if (res == RSResult::AllNull) - ++all_null_count; - } - return {none_count, some_count, all_count, all_null_count}; -} - -UInt64 DMFilePackFilter::countUsePack() const -{ - return std::count_if(pack_res.cbegin(), pack_res.cend(), [](RSResult res) { return res.isUse(); }); + all_null_count); + return result; } void DMFilePackFilter::loadIndex( @@ -296,7 +263,7 @@ void DMFilePackFilter::loadIndex( indexes.emplace(col_id, RSIndex(type, minmax_index)); } -void DMFilePackFilter::tryLoadIndex(ColId col_id) +void DMFilePackFilter::tryLoadIndex(RSCheckParam & param, ColId col_id) { if (param.indexes.count(col_id)) return; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 6e21fa99dbb..2483df83c52 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -34,84 +36,35 @@ extern const Event DMFileFilterAftPKAndPackSet; extern const Event DMFileFilterAftRoughSet; } // namespace ProfileEvents -namespace DB -{ -namespace DM +namespace DB::DM { + class DMFilePackFilter { + friend class DMFilePackFilterResult; + public: // Empty `rowkey_ranges` means do not filter by rowkey_ranges - static DMFilePackFilter loadFrom( + static DMFilePackFilterResult loadFrom( + const DMContext & dm_context, const DMFilePtr & dmfile, - const MinMaxIndexCachePtr & index_cache, bool set_cache_if_miss, const RowKeyRanges & rowkey_ranges, const RSOperatorPtr & filter, - const IdSetPtr & read_packs, - const FileProviderPtr & file_provider, - const ReadLimiterPtr & read_limiter, - const ScanContextPtr & scan_context, - const String & tracing_id, - const ReadTag read_tag) + const IdSetPtr & read_packs) { - return DMFilePackFilter( + DMFilePackFilter pack_filter( dmfile, - index_cache, + dm_context.global_context.getMinMaxIndexCache(), set_cache_if_miss, rowkey_ranges, filter, read_packs, - file_provider, - read_limiter, - scan_context, - tracing_id, - read_tag); - } - - const RSResults & getHandleRes() const { return handle_res; } - const RSResults & getPackResConst() const { return pack_res; } - UInt64 countUsePack() const; - - Handle getMinHandle(size_t pack_id) - { - if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); - auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; - return minmax_index->getIntMinMax(pack_id).first; - } - - StringRef getMinStringHandle(size_t pack_id) - { - if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); - auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; - return minmax_index->getStringMinMax(pack_id).first; - } - - UInt64 getMaxVersion(size_t pack_id) - { - if (!param.indexes.count(VERSION_COLUMN_ID)) - tryLoadIndex(VERSION_COLUMN_ID); - auto & minmax_index = param.indexes.find(VERSION_COLUMN_ID)->second.minmax; - return minmax_index->getUInt64MinMax(pack_id).second; - } - - // Get valid rows and bytes after filter invalid packs by handle_range and filter - std::pair validRowsAndBytes() - { - size_t rows = 0; - size_t bytes = 0; - const auto & pack_stats = dmfile->getPackStats(); - for (size_t i = 0; i < pack_stats.size(); ++i) - { - if (pack_res[i].isUse()) - { - rows += pack_stats[i].rows; - bytes += pack_stats[i].bytes; - } - } - return {rows, bytes}; + dm_context.global_context.getFileProvider(), + dm_context.global_context.getReadLimiter(), + dm_context.scan_context, + dm_context.tracing_id); + return pack_filter.load(dm_context); } private: @@ -125,8 +78,7 @@ class DMFilePackFilter const FileProviderPtr & file_provider_, const ReadLimiterPtr & read_limiter_, const ScanContextPtr & scan_context_, - const String & tracing_id, - const ReadTag read_tag) + const String & tracing_id) : dmfile(dmfile_) , index_cache(index_cache_) , set_cache_if_miss(set_cache_if_miss_) @@ -134,15 +86,12 @@ class DMFilePackFilter , filter(filter_) , read_packs(read_packs_) , file_provider(file_provider_) - , handle_res(dmfile->getPacks(), RSResult::All) , scan_context(scan_context_) , log(Logger::get(tracing_id)) , read_limiter(read_limiter_) - { - init(read_tag); - } + {} - void init(ReadTag read_tag); + DMFilePackFilterResult load(const DMContext & dm_context); static void loadIndex( ColumnIndexes & indexes, @@ -154,13 +103,11 @@ class DMFilePackFilter const ReadLimiterPtr & read_limiter, const ScanContextPtr & scan_context); - void tryLoadIndex(ColId col_id); - - // None+NoneNull, Some+SomeNull, All, AllNull - std::tuple countPackRes() const; + void tryLoadIndex(RSCheckParam & param, ColId col_id); private: DMFilePtr dmfile; + MinMaxIndexCachePtr index_cache; bool set_cache_if_miss; RowKeyRanges rowkey_ranges; @@ -168,18 +115,10 @@ class DMFilePackFilter IdSetPtr read_packs; FileProviderPtr file_provider; - RSCheckParam param; - - // `handle_res` is the filter results of `rowkey_ranges`. - std::vector handle_res; - // `pack_res` is the filter results of `rowkey_ranges && filter && read_packs`. - std::vector pack_res; - const ScanContextPtr scan_context; LoggerPtr log; ReadLimiterPtr read_limiter; }; -} // namespace DM -} // namespace DB +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp new file mode 100644 index 00000000000..7f431338a75 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -0,0 +1,83 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB::DM +{ + +UInt64 DMFilePackFilterResult::countUsePack() const +{ + return std::count_if(pack_res.begin(), pack_res.end(), [](RSResult res) { return res.isUse(); }); +} + +std::pair DMFilePackFilterResult::validRowsAndBytes() +{ + size_t rows = 0; + size_t bytes = 0; + const auto & pack_stats = dmfile->getPackStats(); + for (size_t i = 0; i < pack_stats.size(); ++i) + { + if (pack_res[i].isUse()) + { + rows += pack_stats[i].rows; + bytes += pack_stats[i].bytes; + } + } + return {rows, bytes}; +} + +std::tuple DMFilePackFilterResult::countPackRes() const +{ + UInt64 none_count = 0; + UInt64 some_count = 0; + UInt64 all_count = 0; + UInt64 all_null_count = 0; + for (auto res : pack_res) + { + if (res == RSResult::None || res == RSResult::NoneNull) + ++none_count; + else if (res == RSResult::Some || res == RSResult::SomeNull) + ++some_count; + else if (res == RSResult::All) + ++all_count; + else if (res == RSResult::AllNull) + ++all_null_count; + } + return {none_count, some_count, all_count, all_null_count}; +} + +void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const +{ + if (param.indexes.count(col_id)) + return; + + if (!dmfile->isColIndexExist(col_id)) + return; + + Stopwatch watch; + DMFilePackFilter::loadIndex( + param.indexes, + dmfile, + dm_context.global_context.getFileProvider(), + dm_context.global_context.getMinMaxIndexCache(), + true, + col_id, + dm_context.global_context.getReadLimiter(), + dm_context.scan_context); +} + +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h new file mode 100644 index 00000000000..5ac4e2b8006 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -0,0 +1,106 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB::DM +{ + +class DMFilePackFilterResult; +using DMFilePackFilterResultPtr = std::shared_ptr; +using DMFilePackFilterResults = std::vector; + +class DMFilePackFilterResult +{ + friend class DMFilePackFilter; + +public: + DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_, size_t pack_count_) + : dm_context(dm_context_) + , dmfile(dmfile_) + , handle_res(pack_count_, RSResult::All) + {} + + const RSResults & getHandleRes() const { return handle_res; } + const RSResults & getPackResConst() const { return pack_res; } + RSResults & getPackRes() { return pack_res; } + UInt64 countUsePack() const; + + Handle getMinHandle(size_t pack_id) const + { + if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) + tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; + return minmax_index->getIntMinMax(pack_id).first; + } + + StringRef getMinStringHandle(size_t pack_id) const + { + if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) + tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; + return minmax_index->getStringMinMax(pack_id).first; + } + + UInt64 getMaxVersion(size_t pack_id) const + { + if (!param.indexes.count(VERSION_COLUMN_ID)) + tryLoadIndex(VERSION_COLUMN_ID); + auto & minmax_index = param.indexes.find(VERSION_COLUMN_ID)->second.minmax; + return minmax_index->getUInt64MinMax(pack_id).second; + } + + static DMFilePackFilterResultPtr emptyResult(const DMContext & dm_context, const DMFilePtr & dmfile) + { + return std::make_shared(dm_context, dmfile, 0); + } + + static DMFilePackFilterResults emptyResults(const DMContext & dm_context, const DMFiles & files) + { + DMFilePackFilterResults results; + results.reserve(files.size()); + for (const auto & file : files) + { + results.push_back(emptyResult(dm_context, file)); + } + return results; + } + + // Get valid rows and bytes after filter invalid packs by handle_range and filter + std::pair validRowsAndBytes(); + + // None+NoneNull, Some+SomeNull, All, AllNull + std::tuple countPackRes() const; + +private: + void tryLoadIndex(ColId col_id) const; + +private: + const DMContext & dm_context; + + DMFilePtr dmfile; + mutable RSCheckParam param; + + // `handle_res` is the filter results of `rowkey_ranges`. + std::vector handle_res; + // `pack_res` is the filter results of `rowkey_ranges && filter && read_packs`. + std::vector pack_res; +}; + +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h index 60246ae83f4..f2064ab3f5e 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h @@ -24,4 +24,8 @@ using IdSetPtr = std::shared_ptr; class DMFilePackFilter; +class DMFilePackFilterResult; +using DMFilePackFilterResultPtr = std::shared_ptr; +using DMFilePackFilterResults = std::vector; + } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index b26cf5b5321..125db7956c8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -46,7 +46,7 @@ DMFileReader::DMFileReader( bool is_fast_scan_, UInt64 max_read_version_, // filters - DMFilePackFilter && pack_filter_, + const DMFilePackFilterResult & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -69,7 +69,7 @@ DMFileReader::DMFileReader( , is_fast_scan(is_fast_scan_) , enable_column_cache(enable_column_cache_ && column_cache_) , max_read_version(max_read_version_) - , pack_filter(std::move(pack_filter_)) + , pack_filter(pack_filter_) , mark_cache(mark_cache_) , column_cache(column_cache_) , scan_context(scan_context_) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h index 85c49951b9e..a2336d3f7f9 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h @@ -56,7 +56,7 @@ class DMFileReader // The the MVCC filter version. Used by clean read check. UInt64 max_read_version_, // filters - DMFilePackFilter && pack_filter_, + const DMFilePackFilterResult & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -184,7 +184,7 @@ class DMFileReader const UInt64 max_read_version; /// Filters - DMFilePackFilter pack_filter; + const DMFilePackFilterResult & pack_filter; /// Caches MarkCachePtr mark_cache; diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 237716abe81..79ce22e6b92 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -947,6 +947,21 @@ BlockInputStreamPtr Segment::getInputStream( expected_block_size, columns_to_read, segment_snap->stable->stable); + + // load DMilePackFilterResult for each DMFile + DMFilePackFilterResults pack_filter_results; + for (const auto & dmfile : segment_snap->stable->getDMFiles()) + { + auto result = std::make_shared(DMFilePackFilter::loadFrom( + dm_context, + dmfile, + /*set_cache_if_miss*/ true, + read_ranges, + filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + /*read_pack*/ {})); + pack_filter_results.emplace_back(std::move(result)); + } + switch (read_mode) { case ReadMode::Normal: @@ -955,7 +970,7 @@ BlockInputStreamPtr Segment::getInputStream( columns_to_read, segment_snap, read_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, start_ts, clipped_block_rows); case ReadMode::Fast: @@ -964,7 +979,7 @@ BlockInputStreamPtr Segment::getInputStream( columns_to_read, segment_snap, read_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, clipped_block_rows); case ReadMode::Raw: return getInputStreamModeRaw( // @@ -980,6 +995,7 @@ BlockInputStreamPtr Segment::getInputStream( segment_snap, read_ranges, filter, + pack_filter_results, start_ts, expected_block_size, clipped_block_rows); @@ -1002,7 +1018,7 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, bool need_row_id) @@ -1027,11 +1043,11 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( dm_context, *read_info.read_columns, real_ranges, - filter, start_ts, expected_block_size, false, - read_tag); + read_tag, + pack_filter_results); } else if (useCleanRead(segment_snap, columns_to_read)) { @@ -1041,11 +1057,11 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( dm_context, *read_info.read_columns, real_ranges, - filter, start_ts, expected_block_size, true, - read_tag); + read_tag, + pack_filter_results); } else { @@ -1053,13 +1069,13 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( dm_context, *read_info.read_columns, real_ranges, - filter, segment_snap->stable, read_info.getDeltaReader(need_row_id ? ReadTag::MVCC : ReadTag::Query), read_info.index_begin, read_info.index_end, expected_block_size, read_tag, + pack_filter_results, start_ts, need_row_id); } @@ -1086,7 +1102,7 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( const DMContext & dm_context, const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -1098,7 +1114,7 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( columns_to_read, segment_snap, read_ranges, - filter, + pack_filter_results, start_ts, expected_block_size); } @@ -1118,7 +1134,6 @@ BlockInputStreamPtr Segment::getInputStreamForDataExport( dm_context, *read_info.read_columns, data_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, read_info.getDeltaReader(ReadTag::Internal), read_info.index_begin, @@ -1153,7 +1168,7 @@ BlockInputStreamPtr Segment::getInputStreamModeFast( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, size_t expected_block_size) { auto real_ranges = shrinkRowKeyRanges(read_ranges); @@ -1206,11 +1221,11 @@ BlockInputStreamPtr Segment::getInputStreamModeFast( dm_context, *new_columns_to_read, real_ranges, - filter, std::numeric_limits::max(), expected_block_size, enable_handle_clean_read, ReadTag::Query, + pack_filter_results, /* is_fast_scan */ true, enable_del_clean_read); @@ -1272,7 +1287,6 @@ BlockInputStreamPtr Segment::getInputStreamModeRaw( dm_context, *new_columns_to_read, data_ranges, - EMPTY_RS_OPERATOR, std::numeric_limits::max(), expected_block_size, /* enable_handle_clean_read */ false, @@ -1790,7 +1804,6 @@ std::optional Segment::getSplitPointSlow( dm_context, *pk_col_defs, rowkey_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, delta_reader, read_info.index_begin, @@ -1817,7 +1830,6 @@ std::optional Segment::getSplitPointSlow( dm_context, *pk_col_defs, rowkey_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, delta_reader, read_info.index_begin, @@ -2107,7 +2119,6 @@ std::optional Segment::prepareSplitPhysical( // dm_context, *read_info.read_columns, my_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, my_delta_reader, read_info.index_begin, @@ -2139,7 +2150,6 @@ std::optional Segment::prepareSplitPhysical( // dm_context, *read_info.read_columns, other_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, other_delta_reader, read_info.index_begin, @@ -2147,7 +2157,6 @@ std::optional Segment::prepareSplitPhysical( // dm_context.stable_pack_rows, ReadTag::Internal); - other_data = std::make_shared>(other_data, other_ranges, 0); other_data = std::make_shared>( other_data, @@ -2342,7 +2351,6 @@ StableValueSpacePtr Segment::prepareMerge( dm_context, *read_info.read_columns, rowkey_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, read_info.getDeltaReader(ReadTag::Internal), read_info.index_begin, @@ -2706,13 +2714,13 @@ SkippableBlockInputStreamPtr Segment::getPlacedStream( const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, const StableSnapshotPtr & stable_snap, const DeltaValueReaderPtr & delta_reader, const DeltaIndexIterator & delta_index_begin, const DeltaIndexIterator & delta_index_end, size_t expected_block_size, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, bool need_row_id) { @@ -2723,11 +2731,11 @@ SkippableBlockInputStreamPtr Segment::getPlacedStream( dm_context, read_columns, rowkey_ranges, - filter, start_ts, expected_block_size, /* enable_handle_clean_read */ false, read_tag, + pack_filter_results, /* is_fast_scan */ false, /* enable_del_clean_read */ false); RowKeyRange rowkey_range = rowkey_ranges.size() == 1 @@ -2928,7 +2936,6 @@ bool Segment::placeUpsert( dm_context, {handle, getVersionColumnDefine()}, {place_handle_range}, - EMPTY_RS_OPERATOR, stable_snap, delta_reader, compacted_index->begin(), @@ -2981,7 +2988,6 @@ bool Segment::placeDelete( dm_context, {handle, getVersionColumnDefine()}, delete_ranges, - EMPTY_RS_OPERATOR, stable_snap, delta_reader, compacted_index->begin(), @@ -3019,7 +3025,6 @@ bool Segment::placeDelete( dm_context, {handle, getVersionColumnDefine()}, {place_handle_range}, - EMPTY_RS_OPERATOR, stable_snap, delta_reader, compacted_index->begin(), @@ -3041,7 +3046,7 @@ BitmapFilterPtr Segment::buildBitmapFilter( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3052,13 +3057,19 @@ BitmapFilterPtr Segment::buildBitmapFilter( dm_context, segment_snap, read_ranges, - filter, + pack_filter_results, start_ts, expected_block_size); } else { - return buildBitmapFilterNormal(dm_context, segment_snap, read_ranges, filter, start_ts, expected_block_size); + return buildBitmapFilterNormal( + dm_context, + segment_snap, + read_ranges, + pack_filter_results, + start_ts, + expected_block_size); } } @@ -3066,7 +3077,7 @@ BitmapFilterPtr Segment::buildBitmapFilterNormal( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3080,7 +3091,7 @@ BitmapFilterPtr Segment::buildBitmapFilterNormal( columns_to_read, segment_snap, read_ranges, - filter, + pack_filter_results, start_ts, expected_block_size, /*need_row_id*/ true); @@ -3116,9 +3127,7 @@ struct Range std::pair, std::vector> parseDMFilePackInfo( const DMFiles & dmfiles, - const DMContext & dm_context, - const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_result, UInt64 start_ts) { // Packs that all rows compliant with MVCC filter and RowKey filter requirements. @@ -3137,22 +3146,12 @@ std::pair, std::vector> parseDMFilePackInfo( size_t rows = 0; UInt32 preceded_rows = 0; - for (const auto & dmfile : dmfiles) + for (size_t i = 0; i < dmfiles.size(); ++i) { - DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom( - dmfile, - dm_context.global_context.getMinMaxIndexCache(), - /*set_cache_if_miss*/ true, - read_ranges, - filter, - /*read_pack*/ {}, - dm_context.global_context.getFileProvider(), - dm_context.global_context.getReadLimiter(), - dm_context.scan_context, - dm_context.tracing_id, - ReadTag::MVCC); - const auto & pack_res = pack_filter.getPackResConst(); - const auto & handle_res = pack_filter.getHandleRes(); + const auto & dmfile = dmfiles[i]; + const auto & pack_filter = pack_filter_result[i]; + const auto & pack_res = pack_filter->getPackResConst(); + const auto & handle_res = pack_filter->getHandleRes(); const auto & pack_stats = dmfile->getPackStats(); auto some_packs_set = std::make_shared(); @@ -3167,7 +3166,7 @@ std::pair, std::vector> parseDMFilePackInfo( } if (handle_res[pack_id] == RSResult::Some || pack_stat.not_clean > 0 - || pack_filter.getMaxVersion(pack_id) > start_ts) + || pack_filter->getMaxVersion(pack_id) > start_ts) { // We need to read this pack to do RowKey or MVCC filter. some_packs_set->insert(pack_id); @@ -3202,7 +3201,7 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3216,7 +3215,7 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( return elapse_ns / 1'000'000.0; }; - auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, dm_context, read_ranges, filter, start_ts); + auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts); if (skipped_ranges.size() == 1 && skipped_ranges[0].offset == 0 && skipped_ranges[0].rows == segment_snap->stable->getDMFilesRows()) @@ -3266,11 +3265,11 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( dm_context, columns_to_read, read_ranges, - filter, start_ts, expected_block_size, /*enable_handle_clean_read*/ false, ReadTag::MVCC, + pack_filter_results, /*is_fast_scan*/ false, /*enable_del_clean_read*/ false, /*read_packs*/ some_packs_sets, @@ -3304,6 +3303,7 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, ReadTag read_tag) @@ -3314,15 +3314,17 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( constexpr auto is_fast_scan = true; auto enable_del_clean_read = !hasColumn(columns_to_read, TAG_COLUMN_ID); - SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->getInputStream( + auto ann_query_info = getANNQueryInfo(filter); + SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->tryGetInputStreamWithVectorIndex( dm_context, columns_to_read, read_ranges, - filter, + ann_query_info, start_ts, expected_block_size, enable_handle_clean_read, read_tag, + pack_filter_results, is_fast_scan, enable_del_clean_read, /* read_packs */ {}, @@ -3339,7 +3341,6 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( columns_to_read_ptr, this->rowkey_range, read_tag); - auto ann_query_info = getANNQueryInfo(filter); SkippableBlockInputStreamPtr persisted_files_stream = ColumnFileSetWithVectorIndexInputStream::tryBuild( dm_context, persisted_files, @@ -3365,6 +3366,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & data_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3376,6 +3378,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( *filter_columns, data_ranges, filter->rs_operator, + pack_filter_results, start_ts, expected_block_size, ReadTag::LMFilter); @@ -3444,6 +3447,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( *rest_columns_to_read, data_ranges, filter->rs_operator, + pack_filter_results, start_ts, expected_block_size, ReadTag::Query); @@ -3481,6 +3485,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t build_bitmap_filter_block_rows, size_t read_data_block_rows) @@ -3495,7 +3500,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( dm_context, segment_snap, real_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, start_ts, build_bitmap_filter_block_rows); @@ -3515,6 +3520,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( segment_snap, real_ranges, filter, + pack_filter_results, start_ts, read_data_block_rows); } @@ -3526,6 +3532,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( columns_to_read, real_ranges, filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, start_ts, read_data_block_rows, ReadTag::Query); diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 22b50f8ad8c..b8ba5b3400f 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -239,7 +239,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, bool need_row_id = false); @@ -248,7 +248,7 @@ class Segment const DMContext & dm_context, const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter = {}, + const DMFilePackFilterResults & pack_filter_results = {}, UInt64 start_ts = std::numeric_limits::max(), size_t expected_block_size = DEFAULT_BLOCK_SIZE); @@ -270,7 +270,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, size_t expected_block_size = DEFAULT_BLOCK_SIZE); BlockInputStreamPtr getInputStreamModeRaw( @@ -684,13 +684,13 @@ class Segment const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, const StableSnapshotPtr & stable_snap, const DeltaValueReaderPtr & delta_reader, const DeltaIndexIterator & delta_index_begin, const DeltaIndexIterator & delta_index_end, size_t expected_block_size, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results = {}, UInt64 start_ts = std::numeric_limits::max(), bool need_row_id = false); @@ -734,21 +734,21 @@ class Segment const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); BitmapFilterPtr buildBitmapFilterNormal( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); BitmapFilterPtr buildBitmapFilterStableOnly( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); SkippableBlockInputStreamPtr getConcatSkippableBlockInputStream( @@ -758,6 +758,7 @@ class Segment const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, ReadTag read_tag); @@ -767,6 +768,7 @@ class Segment const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t build_bitmap_filter_block_rows, size_t read_data_block_rows); @@ -778,6 +780,7 @@ class Segment const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & data_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp index 76edbcd84f4..28822cc26af 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp @@ -521,7 +521,7 @@ void SegmentReadTask::checkMemTableSet(const ColumnFileSetSnapshotPtr & mem_tabl void SegmentReadTask::checkMemTableSetReady() const { const auto & mem_table_snap = read_snapshot->delta->getMemTableSetSnapshot(); - for (auto & cf : mem_table_snap->getColumnFiles()) + for (const auto & cf : mem_table_snap->getColumnFiles()) { if (auto * in_mem_cf = cf->tryToInMemoryFile(); in_mem_cf) { diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 10b03785e1d..9d6d7b8a707 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -49,21 +49,15 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang } else if (dm_context != nullptr) { - auto index_cache = dm_context->global_context.getGlobalContext().getMinMaxIndexCache(); for (const auto & file : files_) { auto pack_filter = DMFilePackFilter::loadFrom( + *dm_context, file, - index_cache, /*set_cache_if_miss*/ true, {range}, EMPTY_RS_OPERATOR, - {}, - dm_context->global_context.getFileProvider(), - dm_context->getReadLimiter(), - dm_context->scan_context, - dm_context->tracing_id, - ReadTag::Internal); + {}); auto [file_valid_rows, file_valid_bytes] = pack_filter.validRowsAndBytes(); rows += file_valid_rows; bytes += file_valid_bytes; @@ -377,17 +371,12 @@ void StableValueSpace::calculateStableProperty( mvcc_stream->readSuffix(); } auto pack_filter = DMFilePackFilter::loadFrom( + context, file, - context.global_context.getMinMaxIndexCache(), /*set_cache_if_miss*/ false, {rowkey_range}, EMPTY_RS_OPERATOR, - {}, - context.global_context.getFileProvider(), - context.getReadLimiter(), - context.scan_context, - context.tracing_id, - ReadTag::Internal); + {}); const auto & pack_res = pack_filter.getPackResConst(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; @@ -461,11 +450,72 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( const DMContext & context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, UInt64 max_data_version, size_t expected_block_size, bool enable_handle_clean_read, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, + bool is_fast_scan, + bool enable_del_clean_read, + const std::vector & read_packs, + bool need_row_id) +{ + LOG_DEBUG( + log, + "start_ts: {}, enable_handle_clean_read: {}, is_fast_mode: {}, enable_del_clean_read: {}", + max_data_version, + enable_handle_clean_read, + is_fast_scan, + enable_del_clean_read); + SkippableBlockInputStreams streams; + std::vector rows; + streams.reserve(stable->files.size()); + rows.reserve(stable->files.size()); + + for (size_t i = 0; i < stable->files.size(); i++) + { + DMFileBlockInputStreamBuilder builder(context.global_context); + const auto & pack_filter_result = !pack_filter_results.empty() + ? pack_filter_results[i] + : DMFilePackFilterResult::emptyResult(context, stable->files[i]); + builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) + .enableColumnCacheLongTerm(context.pk_col_id) + .setDMFilePackFilterResult(pack_filter_result) + .setColumnCache(column_caches[i]) + .setTracingID(context.tracing_id) + .setRowsThreshold(expected_block_size) + .setReadPacks(read_packs.size() > i ? read_packs[i] : nullptr) + .setReadTag(read_tag); + + streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges, context.scan_context)); + rows.push_back(stable->files[i]->getRows()); + } + if (need_row_id) + { + return std::make_shared>( + streams, + std::move(rows), + context.scan_context); + } + else + { + return std::make_shared>( + streams, + std::move(rows), + context.scan_context); + } +} + +SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVectorIndex( + const DMContext & context, + const ColumnDefines & read_columns, + const RowKeyRanges & rowkey_ranges, + const ANNQueryInfoPtr & ann_query_info, + UInt64 max_data_version, + size_t expected_block_size, + bool enable_handle_clean_read, + ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, bool is_fast_scan, bool enable_del_clean_read, const std::vector & read_packs, @@ -489,9 +539,13 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( for (size_t i = 0; i < stable->files.size(); i++) { DMFileBlockInputStreamBuilder builder(context.global_context); + const auto & pack_filter_result = !pack_filter_results.empty() + ? pack_filter_results[i] + : DMFilePackFilterResult::emptyResult(context, stable->files[i]); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) .enableColumnCacheLongTerm(context.pk_col_id) - .setRSOperator(filter) + .setAnnQureyInfo(ann_query_info) + .setDMFilePackFilterResult(pack_filter_result) .setColumnCache(column_caches[i]) .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size) @@ -543,17 +597,12 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & for (auto & f : stable->files) { auto filter = DMFilePackFilter::loadFrom( + context, f, - context.global_context.getMinMaxIndexCache(), /*set_cache_if_miss*/ false, {range}, RSOperatorPtr{}, - IdSetPtr{}, - context.global_context.getFileProvider(), - context.getReadLimiter(), - context.scan_context, - context.tracing_id, - ReadTag::Internal); + IdSetPtr{}); const auto & pack_stats = f->getPackStats(); const auto & pack_res = filter.getPackResConst(); for (size_t i = 0; i < pack_stats.size(); ++i) @@ -589,17 +638,12 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, co { const auto & file = stable->files[file_idx]; auto filter = DMFilePackFilter::loadFrom( + context, file, - context.global_context.getMinMaxIndexCache(), /*set_cache_if_miss*/ false, {range}, RSOperatorPtr{}, - IdSetPtr{}, - context.global_context.getFileProvider(), - context.getReadLimiter(), - context.scan_context, - context.tracing_id, - ReadTag::Internal); + IdSetPtr{}); const auto & handle_filter_result = filter.getHandleRes(); if (file_idx == 0) { diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index b6e07214f1a..8fdddbaeb48 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -14,12 +14,16 @@ #pragma once +#include +#include #include #include #include #include #include #include +#include +#include #include #include #include @@ -224,11 +228,26 @@ class StableValueSpace : public std::enable_shared_from_this const DMContext & context, // const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, UInt64 max_data_version, size_t expected_block_size, bool enable_handle_clean_read, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results = {}, + bool is_fast_scan = false, + bool enable_del_clean_read = false, + const std::vector & read_packs = {}, + bool need_row_id = false); + + SkippableBlockInputStreamPtr tryGetInputStreamWithVectorIndex( + const DMContext & context, + const ColumnDefines & read_columns, + const RowKeyRanges & rowkey_ranges, + const ANNQueryInfoPtr & ann_query_info, + UInt64 max_data_version, + size_t expected_block_size, + bool enable_handle_clean_read, + ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, bool is_fast_scan = false, bool enable_del_clean_read = false, const std::vector & read_packs = {}, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 0672993e4f4..7235bab5c5c 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -1575,12 +1575,14 @@ try auto test_read_filter = [&](const HandleRange & range) { // Filtered by rough set filter auto filter = toRSFilter(i64_cd, range); + const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; + auto pack_result = std::make_shared( + DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream - = builder.setColumnCache(column_cache) - .setRSOperator(filter) // Filtered by rough set filter - .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}, std::make_shared()); + auto stream = builder.setColumnCache(column_cache) + .setDMFilePackFilterResult(pack_result) + .build(dm_file, *cols, read_ranges, std::make_shared()); Int64 expect_first_pk = static_cast(std::floor(std::max(0, range.start) / span_per_part)) * span_per_part; Int64 expect_last_pk = std::min( @@ -1656,12 +1658,14 @@ try // (first range) Or (Unsupported) -> should NOT filter any chunk filters.emplace_back(createOr({one_part_filter, createUnsupported("test")}), num_rows_write); auto test_read_filter = [&](const DM::RSOperatorPtr & filter, const size_t num_rows_should_read) { + const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; + auto pack_result = std::make_shared( + DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream - = builder.setColumnCache(column_cache) - .setRSOperator(filter) // Filtered by rough set filter - .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}, std::make_shared()); + auto stream = builder.setColumnCache(column_cache) + .setDMFilePackFilterResult(pack_result) + .build(dm_file, *cols, read_ranges, std::make_shared()); Int64 expect_first_pk = 0; Int64 expect_last_pk = num_rows_should_read; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 9e1441f96a8..6c7d5c8ed76 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1087,7 +1088,7 @@ try dmContext(), segment_snap, real_ranges, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); // the bitmap only contains the overlapped packs of ColumnFileBig. So only 60 here. @@ -1107,6 +1108,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, + DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); @@ -1148,17 +1150,14 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, + DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); // Only the rows in [30, 50) and [80, 90) valid auto vec = createNumbers(30, 50); vec.append_range(createNumbers(80, 90)); - ASSERT_INPUTSTREAM_BLOCK_UR( - in, - Block({ - createColumn(vec), - })); + ASSERT_INPUTSTREAM_BLOCK_UR(in, Block({createColumn(vec)})); } } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp index 89f7181f98d..dcc9f38d50a 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp @@ -240,7 +240,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.5})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -265,7 +265,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -290,7 +290,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -318,7 +318,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -343,7 +343,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -368,7 +368,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -393,7 +393,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -428,7 +428,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -453,7 +453,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -491,7 +491,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -589,7 +589,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -615,7 +615,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -644,7 +644,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -674,7 +674,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -700,7 +700,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -729,7 +729,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -759,7 +759,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -784,7 +784,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -812,7 +812,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -876,7 +876,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.5})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(5, true)) .tryBuildWithVectorIndex( dm_file, @@ -944,7 +944,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({5.0, 5.0, 5.5})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(6, true)) .tryBuildWithVectorIndex( dm_file, @@ -969,7 +969,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.0})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(6, true)) .tryBuildWithVectorIndex( dm_file, @@ -994,7 +994,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({0.0, 0.0, 0.0})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(6, true)) .tryBuildWithVectorIndex( dm_file, @@ -1022,7 +1022,7 @@ try bitmap_filter->set(/* start */ 5, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 6)) .tryBuildWithVectorIndex( dm_file, @@ -1093,7 +1093,7 @@ try bitmap_filter->set(0, 6); // 0~6 rows are valid, 6~9 rows are invalid due to pack filter. DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 9)) .tryBuildWithVectorIndex(dm_file, read_cols, row_key_ranges, std::make_shared()); ASSERT_INPUTSTREAM_COLS_UR( @@ -1107,7 +1107,7 @@ try // TopK=4 ann_query_info->set_top_k(4); builder = DMFileBlockInputStreamBuilder(dbContext()); - stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 9)) .tryBuildWithVectorIndex(dm_file, read_cols, row_key_ranges, std::make_shared()); ASSERT_INPUTSTREAM_COLS_UR( @@ -1136,7 +1136,7 @@ try bitmap_filter->set(3, 2); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 9)) .tryBuildWithVectorIndex(dm_file, read_cols, row_key_ranges, std::make_shared()); ASSERT_INPUTSTREAM_COLS_UR( @@ -1204,6 +1204,7 @@ class VectorIndexSegmentTestBase snapshot, {range}, std::make_shared(wrapWithANNQueryInfo({}, ann_query)), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp index 226760ff9c4..d4453abd195 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp @@ -16,11 +16,14 @@ #include #include #include +#include #include #include #include #include #include + + using namespace std::chrono_literals; using namespace DB::tests; @@ -371,7 +374,7 @@ TEST_F(SegmentBitmapFilterTest, CleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -393,7 +396,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -413,7 +416,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), 1, DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -441,7 +444,7 @@ TEST_F(SegmentBitmapFilterTest, StableRange) *dm_context, snap, {buildRowKeyRange(10000, 50000)}, // [10000, 50000) - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -510,7 +513,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - nullptr, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 30); @@ -540,7 +543,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - nullptr, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 750); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index 0f8c77f515d..f2c10e2a677 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -1025,7 +1025,7 @@ std::vector SegmentTestBasic::readSegment(PageIdU64 segment_id, bool need columns_to_read, snapshot, ranges.empty() ? RowKeyRanges{segment->getRowKeyRange()} : ranges, - nullptr, + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, need_row_id); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp index 549e335a951..01e38b58c1e 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp @@ -105,6 +105,7 @@ class SkippableBlockInputStreamTest : public SegmentTestBasic columns_to_read, read_ranges, EMPTY_RS_OPERATOR, + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, ReadTag::Internal); From f3d46f7cb8cec55fbe9c152ebe152dcb6249e396 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 11:13:38 +0800 Subject: [PATCH 02/17] fix Signed-off-by: Lloyd-Pottiger --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 2 +- .../Storages/DeltaMerge/File/ColumnStream.cpp | 2 +- .../File/DMFileBlockInputStream.cpp | 20 +++++++-- .../DeltaMerge/File/DMFileBlockInputStream.h | 3 +- .../DeltaMerge/File/DMFilePackFilter.cpp | 6 +-- .../DeltaMerge/File/DMFilePackFilter.h | 4 +- .../File/DMFilePackFilterResult.cpp | 9 ++-- .../DeltaMerge/File/DMFilePackFilterResult.h | 45 +++++++++++++------ .../Storages/DeltaMerge/File/DMFileReader.cpp | 14 +++--- .../Storages/DeltaMerge/File/DMFileReader.h | 4 +- .../DMFileWithVectorIndexBlockInputStream.cpp | 2 +- dbms/src/Storages/DeltaMerge/Index/RSResult.h | 7 +-- dbms/src/Storages/DeltaMerge/Segment.cpp | 8 ++-- .../Storages/DeltaMerge/StableValueSpace.cpp | 26 +++++------ .../tests/gtest_dm_delta_merge_store.cpp | 4 +- .../DeltaMerge/tests/gtest_dm_file.cpp | 6 +-- .../DeltaMerge/tests/gtest_dm_segment.cpp | 6 +-- .../DeltaMerge/tests/gtest_segment_bitmap.cpp | 12 ++--- 18 files changed, 103 insertions(+), 77 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 90aa65c10ca..931247d2f9f 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -45,7 +45,7 @@ void ColumnFileBig::calculateStat(const DMContext & dm_context) {segment_range}, EMPTY_RS_OPERATOR, {}); - std::tie(valid_rows, valid_bytes) = pack_filter.validRowsAndBytes(); + std::tie(valid_rows, valid_bytes) = pack_filter->validRowsAndBytes(); } void ColumnFileBig::removeData(WriteBatches & wbs) const diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index f7c4c31f40a..9e2511b45fa 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -157,7 +157,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe // Try to get the largest buffer size of reading continuous packs size_t buffer_size = 0; - const auto & pack_res = reader.pack_filter.getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackResConst(); for (size_t i = 0; i < n_packs; /*empty*/) { if (!pack_res[i].isUse()) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index b3aee6221fb..80070240de3 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -65,6 +65,13 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( max_sharing_column_bytes_for_all = 0; } + // If pack_filter is not set, we will create a default one. + if (!pack_filter) + { + pack_filter + = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + } + DMFileReader reader( dmfile, read_columns, @@ -73,7 +80,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( enable_del_clean_read, is_fast_scan, max_data_version, - *pack_filter, + pack_filter, mark_cache, enable_column_cache, column_cache, @@ -165,6 +172,13 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; + // If pack_filter is not set, we will create a default one. + if (!pack_filter) + { + pack_filter + = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + } + DMFileReader rest_columns_reader( dmfile, rest_columns, @@ -173,7 +187,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn enable_del_clean_read, is_fast_scan, max_data_version, - *pack_filter, + pack_filter, mark_cache, enable_column_cache, column_cache, @@ -185,7 +199,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn tracing_id, enable_read_thread, scan_context, - ReadTag::Query); + read_tag); if (column_cache_long_term && pk_col_id) // ColumnCacheLongTerm is only filled in Vector Search. diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h index a2a89ae7f26..23b30ace2c1 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h @@ -138,7 +138,7 @@ class DMFileBlockInputStreamBuilder return *this; } - DMFileBlockInputStreamBuilder setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_) + DMFileBlockInputStreamBuilder & setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_) { ann_query_info = ann_query_info_; return *this; @@ -162,6 +162,7 @@ class DMFileBlockInputStreamBuilder read_one_pack_every_time = true; return *this; } + DMFileBlockInputStreamBuilder & setRowsThreshold(size_t rows_threshold_per_read_) { rows_threshold_per_read = rows_threshold_per_read_; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 3bbe0bd0967..f93559dae8f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -21,12 +21,12 @@ namespace DB::DM { -DMFilePackFilterResult DMFilePackFilter::load(const DMContext & dm_context) +DMFilePackFilterResultPtr DMFilePackFilter::load(const DMContext & dm_context) { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); - DMFilePackFilterResult result(dm_context, dmfile, pack_count); + DMFilePackFilterResult result(dm_context, dmfile); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { @@ -153,7 +153,7 @@ DMFilePackFilterResult DMFilePackFilter::load(const DMContext & dm_context) some_count, all_count, all_null_count); - return result; + return std::make_shared(std::move(result)); } void DMFilePackFilter::loadIndex( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 2483df83c52..af9ba2d8a66 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -45,7 +45,7 @@ class DMFilePackFilter public: // Empty `rowkey_ranges` means do not filter by rowkey_ranges - static DMFilePackFilterResult loadFrom( + static DMFilePackFilterResultPtr loadFrom( const DMContext & dm_context, const DMFilePtr & dmfile, bool set_cache_if_miss, @@ -91,7 +91,7 @@ class DMFilePackFilter , read_limiter(read_limiter_) {} - DMFilePackFilterResult load(const DMContext & dm_context); + DMFilePackFilterResultPtr load(const DMContext & dm_context); static void loadIndex( ColumnIndexes & indexes, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp index 7f431338a75..2162d283733 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include @@ -72,12 +71,12 @@ void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const DMFilePackFilter::loadIndex( param.indexes, dmfile, - dm_context.global_context.getFileProvider(), - dm_context.global_context.getMinMaxIndexCache(), + file_provider, + index_cache, true, col_id, - dm_context.global_context.getReadLimiter(), - dm_context.scan_context); + read_limiter, + scan_context); } } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 5ac4e2b8006..eaccb7d3474 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -31,15 +32,33 @@ class DMFilePackFilterResult friend class DMFilePackFilter; public: - DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_, size_t pack_count_) - : dm_context(dm_context_) + DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_) + : index_cache(dm_context_.global_context.getMinMaxIndexCache()) + , file_provider(dm_context_.global_context.getFileProvider()) + , read_limiter(dm_context_.global_context.getReadLimiter()) + , scan_context(dm_context_.scan_context) , dmfile(dmfile_) - , handle_res(pack_count_, RSResult::All) + , handle_res(dmfile->getPacks(), RSResult::All) + , pack_res(dmfile->getPacks(), RSResult::Some) + {} + + DMFilePackFilterResult( + const MinMaxIndexCachePtr & index_cache_, + const FileProviderPtr & file_provider_, + const ReadLimiterPtr & read_limiter_, + const ScanContextPtr & scan_context, + const DMFilePtr & dmfile_) + : index_cache(index_cache_) + , file_provider(file_provider_) + , read_limiter(read_limiter_) + , scan_context(scan_context) + , dmfile(dmfile_) + , handle_res(dmfile->getPacks(), RSResult::All) + , pack_res(dmfile->getPacks(), RSResult::Some) {} const RSResults & getHandleRes() const { return handle_res; } const RSResults & getPackResConst() const { return pack_res; } - RSResults & getPackRes() { return pack_res; } UInt64 countUsePack() const; Handle getMinHandle(size_t pack_id) const @@ -66,19 +85,13 @@ class DMFilePackFilterResult return minmax_index->getUInt64MinMax(pack_id).second; } - static DMFilePackFilterResultPtr emptyResult(const DMContext & dm_context, const DMFilePtr & dmfile) - { - return std::make_shared(dm_context, dmfile, 0); - } - - static DMFilePackFilterResults emptyResults(const DMContext & dm_context, const DMFiles & files) + // Only for test + static DMFilePackFilterResults defaultResults(const DMContext & dm_context, const DMFiles & files) { DMFilePackFilterResults results; results.reserve(files.size()); for (const auto & file : files) - { - results.push_back(emptyResult(dm_context, file)); - } + results.push_back(std::make_shared(dm_context, file)); return results; } @@ -92,7 +105,11 @@ class DMFilePackFilterResult void tryLoadIndex(ColId col_id) const; private: - const DMContext & dm_context; + MinMaxIndexCachePtr index_cache; + FileProviderPtr file_provider; + ReadLimiterPtr read_limiter; + + const ScanContextPtr scan_context; DMFilePtr dmfile; mutable RSCheckParam param; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 125db7956c8..64bfe16f7f7 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -46,7 +46,7 @@ DMFileReader::DMFileReader( bool is_fast_scan_, UInt64 max_read_version_, // filters - const DMFilePackFilterResult & pack_filter_, + const DMFilePackFilterResultPtr & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -260,7 +260,7 @@ Block DMFileReader::readImpl(const ReadBlockInfo & read_info) }); const auto & pack_stats = dmfile->getPackStats(); const auto & pack_properties = dmfile->getPackProperties(); - const auto & handle_res = pack_filter.getHandleRes(); // alias of handle_res in pack_filter + const auto & handle_res = pack_filter->getHandleRes(); // alias of handle_res in pack_filter std::vector handle_column_clean_read_packs; std::vector del_column_clean_read_packs; std::vector version_column_clean_read_packs; @@ -311,7 +311,7 @@ Block DMFileReader::readImpl(const ReadBlockInfo & read_info) // If all handle in a pack are in the given range, no not_clean rows, and max version <= max_read_version, // we do not need to read handle column. if (handle_res[i] == RSResult::All && pack_stats[i].not_clean == 0 - && pack_filter.getMaxVersion(i) <= max_read_version) + && pack_filter->getMaxVersion(i) <= max_read_version) { handle_column_clean_read_packs.push_back(i); version_column_clean_read_packs.push_back(i); @@ -374,12 +374,12 @@ ColumnPtr DMFileReader::cleanRead( { if (is_common_handle) { - StringRef min_handle = pack_filter.getMinStringHandle(range.first); + StringRef min_handle = pack_filter->getMinStringHandle(range.first); return cd.type->createColumnConst(rows_count, Field(min_handle.data, min_handle.size)); } else { - Handle min_handle = pack_filter.getMinHandle(range.first); + Handle min_handle = pack_filter->getMinHandle(range.first); return cd.type->createColumnConst(rows_count, Field(min_handle)); } } @@ -706,7 +706,7 @@ void DMFileReader::addSkippedRows(UInt64 rows) void DMFileReader::initReadBlockInfos() { - const auto & pack_res = pack_filter.getPackResConst(); + const auto & pack_res = pack_filter->getPackResConst(); const auto & pack_stats = dmfile->getPackStats(); const size_t read_pack_limit = read_one_pack_every_time ? 1 : std::numeric_limits::max(); @@ -756,7 +756,7 @@ std::vector DMFileReader::splitReadBlockInfos( { const auto pack_end = read_info.start_pack_id + read_info.pack_count; const size_t start_row_offset = pack_offset[read_info.start_pack_id]; - const auto & pack_res = pack_filter.getPackResConst(); + const auto & pack_res = pack_filter->getPackResConst(); const auto & pack_stats = dmfile->getPackStats(); std::vector new_read_block_infos; new_read_block_infos.reserve(pack_end - read_info.start_pack_id); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h index a2336d3f7f9..bb477865c56 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h @@ -56,7 +56,7 @@ class DMFileReader // The the MVCC filter version. Used by clean read check. UInt64 max_read_version_, // filters - const DMFilePackFilterResult & pack_filter_, + const DMFilePackFilterResultPtr & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -184,7 +184,7 @@ class DMFileReader const UInt64 max_read_version; /// Filters - const DMFilePackFilterResult & pack_filter; + const DMFilePackFilterResultPtr pack_filter; /// Caches MarkCachePtr mark_cache; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp index 6f4ac3b10d2..dc4553455d4 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp @@ -151,7 +151,7 @@ void DMFileWithVectorIndexBlockInputStream::updateReadBlockInfos() read_block_infos.clear(); const auto & pack_stats = dmfile->getPackStats(); - const auto & pack_res = reader.pack_filter.getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackResConst(); // Update valid_packs_before_search for (const auto res : pack_res) diff --git a/dbms/src/Storages/DeltaMerge/Index/RSResult.h b/dbms/src/Storages/DeltaMerge/Index/RSResult.h index e52ce7bbfb6..f76b0fbd96d 100644 --- a/dbms/src/Storages/DeltaMerge/Index/RSResult.h +++ b/dbms/src/Storages/DeltaMerge/Index/RSResult.h @@ -46,9 +46,6 @@ class RSResult static ValueResult logicalAnd(ValueResult v0, ValueResult v1) noexcept; static ValueResult logicalOr(ValueResult v0, ValueResult v1) noexcept; - // Deleting or privating constructors, so that cannot create invalid objects. - // Use the static member variables below. - RSResult() = delete; RSResult(ValueResult v_, bool has_null_) : v(v_) , has_null(has_null_) @@ -60,6 +57,10 @@ class RSResult bool has_null; public: + // Deleting constructors, so that cannot create invalid objects. + // Use the static member variables below. + RSResult() = delete; + bool isUse() const noexcept { return v != ValueResult::None; } bool allMatch() const noexcept { return *this == RSResult::All; } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 79ce22e6b92..fc9c4b8902d 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -950,16 +950,17 @@ BlockInputStreamPtr Segment::getInputStream( // load DMilePackFilterResult for each DMFile DMFilePackFilterResults pack_filter_results; + pack_filter_results.reserve(segment_snap->stable->getDMFiles().size()); for (const auto & dmfile : segment_snap->stable->getDMFiles()) { - auto result = std::make_shared(DMFilePackFilter::loadFrom( + auto result = DMFilePackFilter::loadFrom( dm_context, dmfile, /*set_cache_if_miss*/ true, read_ranges, filter ? filter->rs_operator : EMPTY_RS_OPERATOR, - /*read_pack*/ {})); - pack_filter_results.emplace_back(std::move(result)); + /*read_pack*/ {}); + pack_filter_results.push_back(result); } switch (read_mode) @@ -3525,6 +3526,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( read_data_block_rows); } + std::cout << "getBitmapFilterInputStream" << std::endl; auto stream = getConcatSkippableBlockInputStream( bitmap_filter, segment_snap, diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 9d6d7b8a707..018f10c4eab 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -58,7 +58,7 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang {range}, EMPTY_RS_OPERATOR, {}); - auto [file_valid_rows, file_valid_bytes] = pack_filter.validRowsAndBytes(); + auto [file_valid_rows, file_valid_bytes] = pack_filter->validRowsAndBytes(); rows += file_valid_rows; bytes += file_valid_bytes; } @@ -377,12 +377,12 @@ void StableValueSpace::calculateStableProperty( {rowkey_range}, EMPTY_RS_OPERATOR, {}); - const auto & pack_res = pack_filter.getPackResConst(); + const auto & pack_res = pack_filter->getPackResConst(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; if (use_new_pack_properties) { - const size_t use_packs_count = pack_filter.countUsePack(); + const size_t use_packs_count = pack_filter->countUsePack(); RUNTIME_CHECK_MSG( static_cast(new_pack_properties.property_size()) == use_packs_count, @@ -472,15 +472,12 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( streams.reserve(stable->files.size()); rows.reserve(stable->files.size()); - for (size_t i = 0; i < stable->files.size(); i++) + for (size_t i = 0; i < stable->files.size(); ++i) { DMFileBlockInputStreamBuilder builder(context.global_context); - const auto & pack_filter_result = !pack_filter_results.empty() - ? pack_filter_results[i] - : DMFilePackFilterResult::emptyResult(context, stable->files[i]); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) .enableColumnCacheLongTerm(context.pk_col_id) - .setDMFilePackFilterResult(pack_filter_result) + .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size) @@ -536,16 +533,13 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe size_t last_rows = 0; - for (size_t i = 0; i < stable->files.size(); i++) + for (size_t i = 0; i < stable->files.size(); ++i) { DMFileBlockInputStreamBuilder builder(context.global_context); - const auto & pack_filter_result = !pack_filter_results.empty() - ? pack_filter_results[i] - : DMFilePackFilterResult::emptyResult(context, stable->files[i]); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) .enableColumnCacheLongTerm(context.pk_col_id) .setAnnQureyInfo(ann_query_info) - .setDMFilePackFilterResult(pack_filter_result) + .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size) @@ -553,7 +547,7 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe .setReadTag(read_tag); if (bitmap_filter) { - builder = builder.setBitmapFilter( + builder.setBitmapFilter( BitmapFilterView(bitmap_filter, last_rows, last_rows + stable->files[i]->getRows())); last_rows += stable->files[i]->getRows(); } @@ -604,7 +598,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & RSOperatorPtr{}, IdSetPtr{}); const auto & pack_stats = f->getPackStats(); - const auto & pack_res = filter.getPackResConst(); + const auto & pack_res = filter->getPackResConst(); for (size_t i = 0; i < pack_stats.size(); ++i) { if (pack_res[i].isUse()) @@ -644,7 +638,7 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, co {range}, RSOperatorPtr{}, IdSetPtr{}); - const auto & handle_filter_result = filter.getHandleRes(); + const auto & handle_filter_result = filter->getHandleRes(); if (file_idx == 0) { // TODO: this check may not be correct when support multiple files in a stable, let's just keep it now for simplicity diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 226a3bb4cea..94f3f94d45d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -2778,7 +2778,7 @@ Block createBlock(const ColumnDefine & cd, size_t begin, size_t end) } // namespace -TEST_F(DeltaMergeStoreTest, ReadLegacyStringData_CFTiny) +TEST_F(DeltaMergeStoreTest, ReadLegacyStringDataCFTiny) try { // Write legacy string data to CFTiny. @@ -2843,7 +2843,7 @@ try } CATCH -TEST_F(DeltaMergeStoreTest, ReadLegacyStringData_DMFile) +TEST_F(DeltaMergeStoreTest, ReadLegacyStringDataDMFile) try { // Write legacy string data to DMFile. diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 7235bab5c5c..3f2919e5ef3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -1576,8 +1576,7 @@ try // Filtered by rough set filter auto filter = toRSFilter(i64_cd, range); const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; - auto pack_result = std::make_shared( - DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); + auto pack_result = DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {}); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); auto stream = builder.setColumnCache(column_cache) @@ -1659,8 +1658,7 @@ try filters.emplace_back(createOr({one_part_filter, createUnsupported("test")}), num_rows_write); auto test_read_filter = [&](const DM::RSOperatorPtr & filter, const size_t num_rows_should_read) { const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; - auto pack_result = std::make_shared( - DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); + auto pack_result = DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {}); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); auto stream = builder.setColumnCache(column_cache) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 6c7d5c8ed76..5f3930af12e 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -1088,7 +1088,7 @@ try dmContext(), segment_snap, real_ranges, - DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); // the bitmap only contains the overlapped packs of ColumnFileBig. So only 60 here. @@ -1108,7 +1108,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); @@ -1150,7 +1150,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp index d4453abd195..00b030351ac 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp @@ -374,7 +374,7 @@ TEST_F(SegmentBitmapFilterTest, CleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -396,7 +396,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -416,7 +416,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), 1, DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -444,7 +444,7 @@ TEST_F(SegmentBitmapFilterTest, StableRange) *dm_context, snap, {buildRowKeyRange(10000, 50000)}, // [10000, 50000) - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -513,7 +513,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 30); @@ -543,7 +543,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 750); From 08b61f2d34c9667392c7fe00aeef01ec4b65e6d7 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 11:30:38 +0800 Subject: [PATCH 03/17] fix ut Signed-off-by: Lloyd-Pottiger --- .../tests/gtest_dm_delta_merge_store.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 94f3f94d45d..8a4b363a269 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -4172,7 +4172,14 @@ try return filter; }; - DB::registerFunctions(); + try + { + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } constexpr Int64 num_rows = 128; auto filter_all = create_filter(0); @@ -4295,7 +4302,14 @@ try return filter; }; - DB::registerFunctions(); + try + { + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } constexpr Int64 num_rows = 128; auto filter_all = create_filter(0); From 692bd0abadac37710a1069f25e042f8ae3761155 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 11:53:08 +0800 Subject: [PATCH 04/17] rename Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp | 2 +- dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h | 2 +- dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp | 4 ++-- .../DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp | 2 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 2 +- dbms/src/Storages/DeltaMerge/StableValueSpace.cpp | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index 9e2511b45fa..9a9f894bdc8 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -157,7 +157,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe // Try to get the largest buffer size of reading continuous packs size_t buffer_size = 0; - const auto & pack_res = reader.pack_filter->getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackRes(); for (size_t i = 0; i < n_packs; /*empty*/) { if (!pack_res[i].isUse()) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index eaccb7d3474..fb3fd953350 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -58,7 +58,7 @@ class DMFilePackFilterResult {} const RSResults & getHandleRes() const { return handle_res; } - const RSResults & getPackResConst() const { return pack_res; } + const RSResults & getPackRes() const { return pack_res; } UInt64 countUsePack() const; Handle getMinHandle(size_t pack_id) const diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 64bfe16f7f7..181b86b13f8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -706,7 +706,7 @@ void DMFileReader::addSkippedRows(UInt64 rows) void DMFileReader::initReadBlockInfos() { - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); const auto & pack_stats = dmfile->getPackStats(); const size_t read_pack_limit = read_one_pack_every_time ? 1 : std::numeric_limits::max(); @@ -756,7 +756,7 @@ std::vector DMFileReader::splitReadBlockInfos( { const auto pack_end = read_info.start_pack_id + read_info.pack_count; const size_t start_row_offset = pack_offset[read_info.start_pack_id]; - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); const auto & pack_stats = dmfile->getPackStats(); std::vector new_read_block_infos; new_read_block_infos.reserve(pack_end - read_info.start_pack_id); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp index dc4553455d4..a7e65ab9461 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp @@ -151,7 +151,7 @@ void DMFileWithVectorIndexBlockInputStream::updateReadBlockInfos() read_block_infos.clear(); const auto & pack_stats = dmfile->getPackStats(); - const auto & pack_res = reader.pack_filter->getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackRes(); // Update valid_packs_before_search for (const auto res : pack_res) diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index fc9c4b8902d..ba87ce541be 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3151,7 +3151,7 @@ std::pair, std::vector> parseDMFilePackInfo( { const auto & dmfile = dmfiles[i]; const auto & pack_filter = pack_filter_result[i]; - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); const auto & handle_res = pack_filter->getHandleRes(); const auto & pack_stats = dmfile->getPackStats(); diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 018f10c4eab..3a9347498fb 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -377,7 +377,7 @@ void StableValueSpace::calculateStableProperty( {rowkey_range}, EMPTY_RS_OPERATOR, {}); - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; if (use_new_pack_properties) @@ -598,7 +598,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & RSOperatorPtr{}, IdSetPtr{}); const auto & pack_stats = f->getPackStats(); - const auto & pack_res = filter->getPackResConst(); + const auto & pack_res = filter->getPackRes(); for (size_t i = 0; i < pack_stats.size(); ++i) { if (pack_res[i].isUse()) From 4b01d9375af31c1d5397b312b0d8ef604ff6c5cd Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 14:23:43 +0800 Subject: [PATCH 05/17] fix ut Signed-off-by: Lloyd-Pottiger --- .../File/DMFileBlockInputStream.cpp | 26 +++++++++++++--- .../DeltaMerge/File/DMFilePackFilter.cpp | 4 +-- .../DeltaMerge/File/DMFilePackFilter.h | 31 +++++++++++++++++-- .../DeltaMerge/File/DMFilePackFilterResult.h | 23 +------------- dbms/src/Storages/DeltaMerge/Segment.cpp | 1 - .../DeltaMerge/tests/gtest_dm_segment.cpp | 6 ++-- .../tests/gtest_dm_vector_index.cpp | 16 +++++++++- .../DeltaMerge/tests/gtest_segment_bitmap.cpp | 29 ++++++++++++----- 8 files changed, 93 insertions(+), 43 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 80070240de3..49ba97a93f3 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -68,8 +68,17 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( // If pack_filter is not set, we will create a default one. if (!pack_filter) { - pack_filter - = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + pack_filter = DMFilePackFilter::loadFrom( + index_cache, + file_provider, + read_limiter, + scan_context, + dmfile, + true, + rowkey_ranges, + EMPTY_RS_OPERATOR, + read_packs, + tracing_id); } DMFileReader reader( @@ -175,8 +184,17 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn // If pack_filter is not set, we will create a default one. if (!pack_filter) { - pack_filter - = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + pack_filter = DMFilePackFilter::loadFrom( + index_cache, + file_provider, + read_limiter, + scan_context, + dmfile, + true, + rowkey_ranges, + EMPTY_RS_OPERATOR, + read_packs, + tracing_id); } DMFileReader rest_columns_reader( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index f93559dae8f..2ede65781be 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -21,12 +21,12 @@ namespace DB::DM { -DMFilePackFilterResultPtr DMFilePackFilter::load(const DMContext & dm_context) +DMFilePackFilterResultPtr DMFilePackFilter::load() { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); - DMFilePackFilterResult result(dm_context, dmfile); + DMFilePackFilterResult result(index_cache, file_provider, read_limiter, scan_context, dmfile); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index af9ba2d8a66..cf258ac0fe5 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,33 @@ class DMFilePackFilter dm_context.global_context.getReadLimiter(), dm_context.scan_context, dm_context.tracing_id); - return pack_filter.load(dm_context); + return pack_filter.load(); + } + + static DMFilePackFilterResultPtr loadFrom( + const MinMaxIndexCachePtr & index_cache_, + const FileProviderPtr & file_provider_, + const ReadLimiterPtr & read_limiter_, + const ScanContextPtr & scan_context, + const DMFilePtr & dmfile, + bool set_cache_if_miss, + const RowKeyRanges & rowkey_ranges, + const RSOperatorPtr & filter, + const IdSetPtr & read_packs, + const String & tracing_id) + { + DMFilePackFilter pack_filter( + dmfile, + index_cache_, + set_cache_if_miss, + rowkey_ranges, + filter, + read_packs, + file_provider_, + read_limiter_, + scan_context, + tracing_id); + return pack_filter.load(); } private: @@ -91,7 +118,7 @@ class DMFilePackFilter , read_limiter(read_limiter_) {} - DMFilePackFilterResultPtr load(const DMContext & dm_context); + DMFilePackFilterResultPtr load(); static void loadIndex( ColumnIndexes & indexes, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index fb3fd953350..8133deb4c7f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -14,9 +14,8 @@ #pragma once -#include -#include #include +#include #include #include @@ -32,16 +31,6 @@ class DMFilePackFilterResult friend class DMFilePackFilter; public: - DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_) - : index_cache(dm_context_.global_context.getMinMaxIndexCache()) - , file_provider(dm_context_.global_context.getFileProvider()) - , read_limiter(dm_context_.global_context.getReadLimiter()) - , scan_context(dm_context_.scan_context) - , dmfile(dmfile_) - , handle_res(dmfile->getPacks(), RSResult::All) - , pack_res(dmfile->getPacks(), RSResult::Some) - {} - DMFilePackFilterResult( const MinMaxIndexCachePtr & index_cache_, const FileProviderPtr & file_provider_, @@ -85,16 +74,6 @@ class DMFilePackFilterResult return minmax_index->getUInt64MinMax(pack_id).second; } - // Only for test - static DMFilePackFilterResults defaultResults(const DMContext & dm_context, const DMFiles & files) - { - DMFilePackFilterResults results; - results.reserve(files.size()); - for (const auto & file : files) - results.push_back(std::make_shared(dm_context, file)); - return results; - } - // Get valid rows and bytes after filter invalid packs by handle_range and filter std::pair validRowsAndBytes(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index ba87ce541be..6080c076677 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3526,7 +3526,6 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( read_data_block_rows); } - std::cout << "getBitmapFilterInputStream" << std::endl; auto stream = getConcatSkippableBlockInputStream( bitmap_filter, segment_snap, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 5f3930af12e..1d32a8c069b 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -1088,7 +1088,7 @@ try dmContext(), segment_snap, real_ranges, - DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); // the bitmap only contains the overlapped packs of ColumnFileBig. So only 60 here. @@ -1108,7 +1108,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); @@ -1150,7 +1150,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp index dcc9f38d50a..c1b17e94f07 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp @@ -1198,13 +1198,27 @@ class VectorIndexSegmentTestBase { auto range = buildRowKeyRange(begin, end); auto [segment, snapshot] = getSegmentForRead(segment_id); + // load DMilePackFilterResult for each DMFile + DMFilePackFilterResults pack_filter_results; + pack_filter_results.reserve(snapshot->stable->getDMFiles().size()); + for (const auto & dmfile : snapshot->stable->getDMFiles()) + { + auto result = DMFilePackFilter::loadFrom( + *dm_context, + dmfile, + /*set_cache_if_miss*/ true, + {range}, + EMPTY_RS_OPERATOR, + /*read_pack*/ {}); + pack_filter_results.push_back(result); + } auto stream = segment->getBitmapFilterInputStream( *dm_context, columns_to_read, snapshot, {range}, std::make_shared(wrapWithANNQueryInfo({}, ann_query)), - {}, + pack_filter_results, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp index 00b030351ac..24ec7be1bba 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -180,6 +180,18 @@ class SegmentBitmapFilterTest : public SegmentTestBasic ASSERT_TRUE(sequenceEqual(expected_handle.data(), handle->data(), test_case.expected_size)); } } + + auto loadPackFilterResults(const SegmentSnapshotPtr & snap, const RowKeyRanges & ranges) + { + DMFilePackFilterResults results; + results.reserve(snap->stable->getDMFiles().size()); + for (const auto & file : snap->stable->getDMFiles()) + { + auto pack_filter = DMFilePackFilter::loadFrom(*dm_context, file, true, ranges, EMPTY_RS_OPERATOR, {}); + results.push_back(pack_filter); + } + return results; + } }; TEST_F(SegmentBitmapFilterTest, InMemory1) @@ -374,7 +386,7 @@ TEST_F(SegmentBitmapFilterTest, CleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -396,7 +408,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -416,7 +428,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), 1, DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -440,11 +452,12 @@ TEST_F(SegmentBitmapFilterTest, StableRange) ASSERT_EQ(seg->getDelta()->getDeletes(), 0); ASSERT_EQ(seg->getStable()->getRows(), 50000); + auto ranges = std::vector{buildRowKeyRange(10000, 50000)}; // [10000, 50000) auto bitmap_filter = seg->buildBitmapFilterStableOnly( *dm_context, snap, - {buildRowKeyRange(10000, 50000)}, // [10000, 50000) - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + ranges, + loadPackFilterResults(snap, ranges), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -513,7 +526,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 30); @@ -543,7 +556,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 750); From f8decb802231d9bda6aebd2a890a40cb7b23533e Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 25 Dec 2024 13:33:32 +0800 Subject: [PATCH 06/17] small refine Signed-off-by: JaySon-Huang --- dbms/src/Storages/DeltaMerge/File/DMFile.cpp | 2 +- dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 47191e835d4..21048578a4b 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -219,7 +219,7 @@ size_t DMFile::colIndexSize(ColId id) const } else { - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Index of {} not exist", id); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Index is not exist, col_id={}", id); } } else diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp index 2162d283733..98d6e5da714 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -73,7 +73,7 @@ void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const dmfile, file_provider, index_cache, - true, + /*set_cache_if_miss=*/true, col_id, read_limiter, scan_context); From fcb462b76f85c625fb6982cd831d1b3b1755e80f Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 15:34:00 +0800 Subject: [PATCH 07/17] refine Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp | 1 - dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp | 1 + .../Storages/DeltaMerge/File/DMFileBlockInputStream.cpp | 5 ++--- dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp | 3 +++ dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h | 8 +------- .../src/Storages/DeltaMerge/File/DMFilePackFilterResult.h | 3 +-- dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp | 1 + dbms/src/Storages/DeltaMerge/StableValueSpace.cpp | 2 -- dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp | 1 - 9 files changed, 9 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 931247d2f9f..da544bec985 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index 9a9f894bdc8..019caa74dac 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 49ba97a93f3..d7ee14df85d 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -65,7 +64,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( max_sharing_column_bytes_for_all = 0; } - // If pack_filter is not set, we will create a default one. + // If pack_filter is not set, load from EMPTY_RS_OPERATOR. if (!pack_filter) { pack_filter = DMFilePackFilter::loadFrom( @@ -181,7 +180,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; - // If pack_filter is not set, we will create a default one. + // If pack_filter is not set, load from EMPTY_RS_OPERATOR. if (!pack_filter) { pack_filter = DMFilePackFilter::loadFrom( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 2ede65781be..e548483dc3e 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -13,7 +13,10 @@ // limitations under the License. #include +#include +#include #include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index cf258ac0fe5..5bf28484085 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -14,21 +14,15 @@ #pragma once -#include -#include -#include -#include #include #include #include #include #include -#include #include -#include #include #include -#include + namespace ProfileEvents { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 8133deb4c7f..4d86661ebcd 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -15,7 +15,6 @@ #pragma once #include -#include #include #include @@ -30,7 +29,6 @@ class DMFilePackFilterResult { friend class DMFilePackFilter; -public: DMFilePackFilterResult( const MinMaxIndexCachePtr & index_cache_, const FileProviderPtr & file_provider_, @@ -46,6 +44,7 @@ class DMFilePackFilterResult , pack_res(dmfile->getPacks(), RSResult::Some) {} +public: const RSResults & getHandleRes() const { return handle_res; } const RSResults & getPackRes() const { return pack_res; } UInt64 countUsePack() const; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 181b86b13f8..54acd1ca8dc 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 3a9347498fb..b6606b620a2 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -12,13 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 1d32a8c069b..3e0ede20007 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include From 89bc2b69962256ad515d832f3ddb8244ba435fe0 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Wed, 25 Dec 2024 14:07:56 +0800 Subject: [PATCH 08/17] address comments Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 4d86661ebcd..7f3bdf67325 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -21,10 +21,6 @@ namespace DB::DM { -class DMFilePackFilterResult; -using DMFilePackFilterResultPtr = std::shared_ptr; -using DMFilePackFilterResults = std::vector; - class DMFilePackFilterResult { friend class DMFilePackFilter; From 3f681aa8140fe255c43e58f77035000df579b0dd Mon Sep 17 00:00:00 2001 From: JaySon Date: Wed, 25 Dec 2024 15:01:22 +0800 Subject: [PATCH 09/17] Remove shared_ptr to DMFile inside DMFilePackFilterResult (#22) * Remove useless var from DMFilePackFilterResult Signed-off-by: JaySon-Huang * Remove shared_ptr to DMFile inside DMFilePackFilterResult Signed-off-by: JaySon-Huang * fix Signed-off-by: JaySon-Huang --------- Signed-off-by: JaySon-Huang --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 7 +-- .../DeltaMerge/File/DMFilePackFilter.cpp | 25 +++++++++- .../DeltaMerge/File/DMFilePackFilter.h | 7 +++ .../File/DMFilePackFilterResult.cpp | 22 ++------- .../DeltaMerge/File/DMFilePackFilterResult.h | 48 ++++++++++--------- .../Storages/DeltaMerge/File/DMFileReader.cpp | 6 +-- dbms/src/Storages/DeltaMerge/Segment.cpp | 9 ++-- .../Storages/DeltaMerge/StableValueSpace.cpp | 7 +-- 8 files changed, 75 insertions(+), 56 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index da544bec985..ec7d803b66f 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -37,14 +37,11 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil void ColumnFileBig::calculateStat(const DMContext & dm_context) { - auto pack_filter = DMFilePackFilter::loadFrom( + std::tie(valid_rows, valid_bytes) = DMFilePackFilter::loadValidRowsAndBytes( dm_context, file, /*set_cache_if_miss*/ false, - {segment_range}, - EMPTY_RS_OPERATOR, - {}); - std::tie(valid_rows, valid_bytes) = pack_filter->validRowsAndBytes(); + {segment_range}); } void ColumnFileBig::removeData(WriteBatches & wbs) const diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index e548483dc3e..3f8ea38d174 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -24,12 +25,34 @@ namespace DB::DM { +std::pair DMFilePackFilter::loadValidRowsAndBytes( + const DMContext & dm_context, + const DMFilePtr & dmfile, + bool set_cache_if_miss, + const RowKeyRanges & rowkey_ranges) +{ + auto pack_filter = loadFrom(dm_context, dmfile, set_cache_if_miss, rowkey_ranges, EMPTY_RS_OPERATOR, {}); + + size_t rows = 0; + size_t bytes = 0; + const auto & pack_stats = dmfile->getPackStats(); + for (size_t i = 0; i < pack_stats.size(); ++i) + { + if (pack_filter->pack_res[i].isUse()) + { + rows += pack_stats[i].rows; + bytes += pack_stats[i].bytes; + } + } + return {rows, bytes}; +} + DMFilePackFilterResultPtr DMFilePackFilter::load() { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); - DMFilePackFilterResult result(index_cache, file_provider, read_limiter, scan_context, dmfile); + DMFilePackFilterResult result(index_cache, read_limiter, pack_count); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 5bf28484085..5099d4fae61 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -39,6 +39,13 @@ class DMFilePackFilter friend class DMFilePackFilterResult; public: + // Get valid rows and bytes after filter invalid packs by rowkey_ranges + static std::pair loadValidRowsAndBytes( + const DMContext & dm_context, + const DMFilePtr & dmfile, + bool set_cache_if_miss, + const RowKeyRanges & rowkey_ranges); + // Empty `rowkey_ranges` means do not filter by rowkey_ranges static DMFilePackFilterResultPtr loadFrom( const DMContext & dm_context, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp index 98d6e5da714..b5778efeb86 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -23,22 +23,6 @@ UInt64 DMFilePackFilterResult::countUsePack() const return std::count_if(pack_res.begin(), pack_res.end(), [](RSResult res) { return res.isUse(); }); } -std::pair DMFilePackFilterResult::validRowsAndBytes() -{ - size_t rows = 0; - size_t bytes = 0; - const auto & pack_stats = dmfile->getPackStats(); - for (size_t i = 0; i < pack_stats.size(); ++i) - { - if (pack_res[i].isUse()) - { - rows += pack_stats[i].rows; - bytes += pack_stats[i].bytes; - } - } - return {rows, bytes}; -} - std::tuple DMFilePackFilterResult::countPackRes() const { UInt64 none_count = 0; @@ -59,7 +43,11 @@ std::tuple DMFilePackFilterResult::countPackRes( return {none_count, some_count, all_count, all_null_count}; } -void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const +void DMFilePackFilterResult::tryLoadIndex( + const DMFilePtr & dmfile, + ColId col_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (param.indexes.count(col_id)) return; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 7f3bdf67325..dadf054fd88 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -27,17 +27,12 @@ class DMFilePackFilterResult DMFilePackFilterResult( const MinMaxIndexCachePtr & index_cache_, - const FileProviderPtr & file_provider_, const ReadLimiterPtr & read_limiter_, - const ScanContextPtr & scan_context, - const DMFilePtr & dmfile_) + size_t pack_count) : index_cache(index_cache_) - , file_provider(file_provider_) , read_limiter(read_limiter_) - , scan_context(scan_context) - , dmfile(dmfile_) - , handle_res(dmfile->getPacks(), RSResult::All) - , pack_res(dmfile->getPacks(), RSResult::Some) + , handle_res(pack_count, RSResult::All) + , pack_res(pack_count, RSResult::Some) {} public: @@ -45,47 +40,56 @@ class DMFilePackFilterResult const RSResults & getPackRes() const { return pack_res; } UInt64 countUsePack() const; - Handle getMinHandle(size_t pack_id) const + Handle getMinHandle( + const DMFilePtr & dmfile, + size_t pack_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + tryLoadIndex(dmfile, EXTRA_HANDLE_COLUMN_ID, file_provider, scan_context); auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; return minmax_index->getIntMinMax(pack_id).first; } - StringRef getMinStringHandle(size_t pack_id) const + StringRef getMinStringHandle( + const DMFilePtr & dmfile, + size_t pack_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + tryLoadIndex(dmfile, EXTRA_HANDLE_COLUMN_ID, file_provider, scan_context); auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; return minmax_index->getStringMinMax(pack_id).first; } - UInt64 getMaxVersion(size_t pack_id) const + UInt64 getMaxVersion( + const DMFilePtr & dmfile, + size_t pack_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (!param.indexes.count(VERSION_COLUMN_ID)) - tryLoadIndex(VERSION_COLUMN_ID); + tryLoadIndex(dmfile, VERSION_COLUMN_ID, file_provider, scan_context); auto & minmax_index = param.indexes.find(VERSION_COLUMN_ID)->second.minmax; return minmax_index->getUInt64MinMax(pack_id).second; } - // Get valid rows and bytes after filter invalid packs by handle_range and filter - std::pair validRowsAndBytes(); - // None+NoneNull, Some+SomeNull, All, AllNull std::tuple countPackRes() const; private: - void tryLoadIndex(ColId col_id) const; + void tryLoadIndex( + const DMFilePtr & dmfile, + ColId col_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const; private: MinMaxIndexCachePtr index_cache; - FileProviderPtr file_provider; ReadLimiterPtr read_limiter; - const ScanContextPtr scan_context; - - DMFilePtr dmfile; mutable RSCheckParam param; // `handle_res` is the filter results of `rowkey_ranges`. diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 54acd1ca8dc..b0afccc6d91 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -312,7 +312,7 @@ Block DMFileReader::readImpl(const ReadBlockInfo & read_info) // If all handle in a pack are in the given range, no not_clean rows, and max version <= max_read_version, // we do not need to read handle column. if (handle_res[i] == RSResult::All && pack_stats[i].not_clean == 0 - && pack_filter->getMaxVersion(i) <= max_read_version) + && pack_filter->getMaxVersion(dmfile, i, file_provider, scan_context) <= max_read_version) { handle_column_clean_read_packs.push_back(i); version_column_clean_read_packs.push_back(i); @@ -375,12 +375,12 @@ ColumnPtr DMFileReader::cleanRead( { if (is_common_handle) { - StringRef min_handle = pack_filter->getMinStringHandle(range.first); + StringRef min_handle = pack_filter->getMinStringHandle(dmfile, range.first, file_provider, scan_context); return cd.type->createColumnConst(rows_count, Field(min_handle.data, min_handle.size)); } else { - Handle min_handle = pack_filter->getMinHandle(range.first); + Handle min_handle = pack_filter->getMinHandle(dmfile, range.first, file_provider, scan_context); return cd.type->createColumnConst(rows_count, Field(min_handle)); } } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 6080c076677..0ecdcd8de68 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3129,7 +3129,8 @@ struct Range std::pair, std::vector> parseDMFilePackInfo( const DMFiles & dmfiles, const DMFilePackFilterResults & pack_filter_result, - UInt64 start_ts) + UInt64 start_ts, + const DMContext & dm_context) { // Packs that all rows compliant with MVCC filter and RowKey filter requirements. // For building bitmap filter, we don't need to read these packs, @@ -3147,6 +3148,8 @@ std::pair, std::vector> parseDMFilePackInfo( size_t rows = 0; UInt32 preceded_rows = 0; + auto file_provider = dm_context.global_context.getFileProvider(); + for (size_t i = 0; i < dmfiles.size(); ++i) { const auto & dmfile = dmfiles[i]; @@ -3167,7 +3170,7 @@ std::pair, std::vector> parseDMFilePackInfo( } if (handle_res[pack_id] == RSResult::Some || pack_stat.not_clean > 0 - || pack_filter->getMaxVersion(pack_id) > start_ts) + || pack_filter->getMaxVersion(dmfile, pack_id, file_provider, dm_context.scan_context) > start_ts) { // We need to read this pack to do RowKey or MVCC filter. some_packs_set->insert(pack_id); @@ -3216,7 +3219,7 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( return elapse_ns / 1'000'000.0; }; - auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts); + auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts, dm_context); if (skipped_ranges.size() == 1 && skipped_ranges[0].offset == 0 && skipped_ranges[0].rows == segment_snap->stable->getDMFilesRows()) diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index b6606b620a2..add74ba2024 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -49,14 +49,11 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang { for (const auto & file : files_) { - auto pack_filter = DMFilePackFilter::loadFrom( + auto [file_valid_rows, file_valid_bytes] = DMFilePackFilter::loadValidRowsAndBytes( *dm_context, file, /*set_cache_if_miss*/ true, - {range}, - EMPTY_RS_OPERATOR, - {}); - auto [file_valid_rows, file_valid_bytes] = pack_filter->validRowsAndBytes(); + {range}); rows += file_valid_rows; bytes += file_valid_bytes; } From 0a97f355973dc7cb1f5d1b77a6b410af953e9bdb Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 25 Dec 2024 15:11:16 +0800 Subject: [PATCH 10/17] Rename context -> dm_context Signed-off-by: JaySon-Huang --- .../Storages/DeltaMerge/StableValueSpace.cpp | 52 ++++++++++--------- .../Storages/DeltaMerge/StableValueSpace.h | 16 +++--- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index add74ba2024..5d44c600966 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -298,7 +298,7 @@ void StableValueSpace::recordRemovePacksPages(WriteBatches & wbs) const } void StableValueSpace::calculateStableProperty( - const DMContext & context, + const DMContext & dm_context, const RowKeyRange & rowkey_range, bool is_common_handle) { @@ -333,13 +333,13 @@ void StableValueSpace::calculateStableProperty( // // If we pass `segment_range` instead, // then the returned stream is a `SkippableBlockInputStream` which will complicate the implementation - DMFileBlockInputStreamBuilder builder(context.global_context); + DMFileBlockInputStreamBuilder builder(dm_context.global_context); BlockInputStreamPtr data_stream = builder .setRowsThreshold(std::numeric_limits::max()) // because we just read one pack at a time .onlyReadOnePackEveryTime() - .setTracingID(fmt::format("{}-calculateStableProperty", context.tracing_id)) - .build(file, read_columns, RowKeyRanges{rowkey_range}, context.scan_context); + .setTracingID(fmt::format("{}-calculateStableProperty", dm_context.tracing_id)) + .build(file, read_columns, RowKeyRanges{rowkey_range}, dm_context.scan_context); auto mvcc_stream = std::make_shared>( data_stream, read_columns, @@ -366,7 +366,7 @@ void StableValueSpace::calculateStableProperty( mvcc_stream->readSuffix(); } auto pack_filter = DMFilePackFilter::loadFrom( - context, + dm_context, file, /*set_cache_if_miss*/ false, {rowkey_range}, @@ -442,7 +442,7 @@ void StableValueSpace::drop(const FileProviderPtr & file_provider) } SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( - const DMContext & context, + const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, UInt64 max_data_version, @@ -457,7 +457,8 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( { LOG_DEBUG( log, - "start_ts: {}, enable_handle_clean_read: {}, is_fast_mode: {}, enable_del_clean_read: {}", + "StableVS getInputStream" + " start_ts={} enable_handle_clean_read={} is_fast_mode={} enable_del_clean_read={}", max_data_version, enable_handle_clean_read, is_fast_scan, @@ -469,17 +470,17 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( for (size_t i = 0; i < stable->files.size(); ++i) { - DMFileBlockInputStreamBuilder builder(context.global_context); + DMFileBlockInputStreamBuilder builder(dm_context.global_context); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) - .enableColumnCacheLongTerm(context.pk_col_id) + .enableColumnCacheLongTerm(dm_context.pk_col_id) .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) - .setTracingID(context.tracing_id) + .setTracingID(dm_context.tracing_id) .setRowsThreshold(expected_block_size) .setReadPacks(read_packs.size() > i ? read_packs[i] : nullptr) .setReadTag(read_tag); - streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges, context.scan_context)); + streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges, dm_context.scan_context)); rows.push_back(stable->files[i]->getRows()); } if (need_row_id) @@ -487,19 +488,19 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } else { return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } } SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVectorIndex( - const DMContext & context, + const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, const ANNQueryInfoPtr & ann_query_info, @@ -516,7 +517,8 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe { LOG_DEBUG( log, - "start_ts: {}, enable_handle_clean_read: {}, is_fast_mode: {}, enable_del_clean_read: {}", + "StableVS tryGetInputStreamWithVectorIndex" + " start_ts={} enable_handle_clean_read={} is_fast_mode={} enable_del_clean_read={}", max_data_version, enable_handle_clean_read, is_fast_scan, @@ -530,13 +532,13 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe for (size_t i = 0; i < stable->files.size(); ++i) { - DMFileBlockInputStreamBuilder builder(context.global_context); + DMFileBlockInputStreamBuilder builder(dm_context.global_context); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) - .enableColumnCacheLongTerm(context.pk_col_id) + .enableColumnCacheLongTerm(dm_context.pk_col_id) .setAnnQureyInfo(ann_query_info) .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) - .setTracingID(context.tracing_id) + .setTracingID(dm_context.tracing_id) .setRowsThreshold(expected_block_size) .setReadPacks(read_packs.size() > i ? read_packs[i] : nullptr) .setReadTag(read_tag); @@ -551,7 +553,7 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe stable->files[i], read_columns, rowkey_ranges, - context.scan_context)); + dm_context.scan_context)); rows.push_back(stable->files[i]->getRows()); } if (need_row_id) @@ -559,18 +561,18 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } else { return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } } -RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & context, const RowKeyRange & range) +RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const { // Avoid unnecessary reading IO @@ -586,7 +588,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & for (auto & f : stable->files) { auto filter = DMFilePackFilter::loadFrom( - context, + dm_context, f, /*set_cache_if_miss*/ false, {range}, @@ -616,7 +618,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & } StableValueSpace::Snapshot::AtLeastRowsAndBytesResult // -StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, const RowKeyRange & range) const +StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const { AtLeastRowsAndBytesResult ret{}; @@ -627,7 +629,7 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, co { const auto & file = stable->files[file_idx]; auto filter = DMFilePackFilter::loadFrom( - context, + dm_context, file, /*set_cache_if_miss*/ false, {range}, diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index 8fdddbaeb48..04b87ba0a88 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -46,8 +46,8 @@ class StableValueSpace : public std::enable_shared_from_this , log(Logger::get()) {} - static StableValueSpacePtr restore(DMContext & context, PageIdU64 id); - static StableValueSpacePtr restore(DMContext & context, ReadBuffer & buf, PageIdU64 id); + static StableValueSpacePtr restore(DMContext & dm_context, PageIdU64 id); + static StableValueSpacePtr restore(DMContext & dm_context, ReadBuffer & buf, PageIdU64 id); static StableValueSpacePtr createFromCheckpoint( // const LoggerPtr & parent_log, @@ -112,7 +112,7 @@ class StableValueSpace : public std::enable_shared_from_this */ size_t getDMFilesBytes() const; - void enableDMFilesGC(DMContext & context); + void enableDMFilesGC(DMContext & dm_context); void recordRemovePacksPages(WriteBatches & wbs) const; @@ -139,7 +139,7 @@ class StableValueSpace : public std::enable_shared_from_this const StableProperty & getStableProperty() const { return property; } - void calculateStableProperty(const DMContext & context, const RowKeyRange & rowkey_range, bool is_common_handle); + void calculateStableProperty(const DMContext & dm_context, const RowKeyRange & rowkey_range, bool is_common_handle); struct Snapshot; using SnapshotPtr = std::shared_ptr; @@ -225,7 +225,7 @@ class StableValueSpace : public std::enable_shared_from_this } SkippableBlockInputStreamPtr getInputStream( - const DMContext & context, // + const DMContext & dm_context, // const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, UInt64 max_data_version, @@ -239,7 +239,7 @@ class StableValueSpace : public std::enable_shared_from_this bool need_row_id = false); SkippableBlockInputStreamPtr tryGetInputStreamWithVectorIndex( - const DMContext & context, + const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, const ANNQueryInfoPtr & ann_query_info, @@ -254,7 +254,7 @@ class StableValueSpace : public std::enable_shared_from_this bool need_row_id = false, BitmapFilterPtr bitmap_filter = nullptr); - RowsAndBytes getApproxRowsAndBytes(const DMContext & context, const RowKeyRange & range) const; + RowsAndBytes getApproxRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const; struct AtLeastRowsAndBytesResult { @@ -268,7 +268,7 @@ class StableValueSpace : public std::enable_shared_from_this * Get the rows and bytes calculated from packs that is **fully contained** by the given range. * If the pack is partially intersected, then it is not counted. */ - AtLeastRowsAndBytesResult getAtLeastRowsAndBytes(const DMContext & context, const RowKeyRange & range) const; + AtLeastRowsAndBytesResult getAtLeastRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const; private: LoggerPtr log; From 154130b44e839f16deea895b2227d2ed3b21f63f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 25 Dec 2024 15:41:39 +0800 Subject: [PATCH 11/17] Merge the validRowsAndBytes method Signed-off-by: JaySon-Huang --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 4 +- .../DeltaMerge/File/DMFilePackFilter.cpp | 12 +++--- .../DeltaMerge/File/DMFilePackFilter.h | 10 ++++- .../Storages/DeltaMerge/StableValueSpace.cpp | 39 ++++++------------- 4 files changed, 28 insertions(+), 37 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index ec7d803b66f..aef83c19bca 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -37,11 +37,13 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil void ColumnFileBig::calculateStat(const DMContext & dm_context) { - std::tie(valid_rows, valid_bytes) = DMFilePackFilter::loadValidRowsAndBytes( + auto m = DMFilePackFilter::loadValidRowsAndBytes( dm_context, file, /*set_cache_if_miss*/ false, {segment_range}); + valid_rows = m.match_rows; + valid_bytes = m.match_bytes; } void ColumnFileBig::removeData(WriteBatches & wbs) const diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 3f8ea38d174..d18c79108d8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -25,7 +25,7 @@ namespace DB::DM { -std::pair DMFilePackFilter::loadValidRowsAndBytes( +DMFilePackFilter::MatchDetails DMFilePackFilter::loadValidRowsAndBytes( const DMContext & dm_context, const DMFilePtr & dmfile, bool set_cache_if_miss, @@ -33,18 +33,18 @@ std::pair DMFilePackFilter::loadValidRowsAndBytes( { auto pack_filter = loadFrom(dm_context, dmfile, set_cache_if_miss, rowkey_ranges, EMPTY_RS_OPERATOR, {}); - size_t rows = 0; - size_t bytes = 0; + MatchDetails res; const auto & pack_stats = dmfile->getPackStats(); for (size_t i = 0; i < pack_stats.size(); ++i) { if (pack_filter->pack_res[i].isUse()) { - rows += pack_stats[i].rows; - bytes += pack_stats[i].bytes; + res.match_packs += 1; + res.match_rows += pack_stats[i].rows; + res.match_bytes += pack_stats[i].bytes; } } - return {rows, bytes}; + return res; } DMFilePackFilterResultPtr DMFilePackFilter::load() diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 5099d4fae61..375608cfac6 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -39,8 +39,14 @@ class DMFilePackFilter friend class DMFilePackFilterResult; public: - // Get valid rows and bytes after filter invalid packs by rowkey_ranges - static std::pair loadValidRowsAndBytes( + struct MatchDetails + { + size_t match_packs = 0; + size_t match_rows = 0; + size_t match_bytes = 0; + }; + // Get approximate valid rows and bytes after filter invalid packs by rowkey_ranges + static MatchDetails loadValidRowsAndBytes( const DMContext & dm_context, const DMFilePtr & dmfile, bool set_cache_if_miss, diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 5d44c600966..b4913ea1fbe 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -49,13 +49,13 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang { for (const auto & file : files_) { - auto [file_valid_rows, file_valid_bytes] = DMFilePackFilter::loadValidRowsAndBytes( + auto match = DMFilePackFilter::loadValidRowsAndBytes( *dm_context, file, /*set_cache_if_miss*/ true, {range}); - rows += file_valid_rows; - bytes += file_valid_bytes; + rows += match.match_rows; + bytes += match.match_bytes; } } @@ -585,26 +585,13 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & // Usually, this method will be called for some "cold" key ranges. // Loading the index into cache may pollute the cache and make the hot index cache invalid. // So don't refill the cache if the index does not exist. + constexpr bool set_cache_if_miss = false; for (auto & f : stable->files) { - auto filter = DMFilePackFilter::loadFrom( - dm_context, - f, - /*set_cache_if_miss*/ false, - {range}, - RSOperatorPtr{}, - IdSetPtr{}); - const auto & pack_stats = f->getPackStats(); - const auto & pack_res = filter->getPackRes(); - for (size_t i = 0; i < pack_stats.size(); ++i) - { - if (pack_res[i].isUse()) - { - ++match_packs; - total_match_rows += pack_stats[i].rows; - total_match_bytes += pack_stats[i].bytes; - } - } + auto match = DMFilePackFilter::loadValidRowsAndBytes(dm_context, f, set_cache_if_miss, {range}); + match_packs += match.match_packs; + total_match_rows += match.match_rows; + total_match_bytes += match.match_bytes; } if (!total_match_rows || !match_packs) return {0, 0}; @@ -625,16 +612,12 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & dm_context, // Usually, this method will be called for some "cold" key ranges. // Loading the index into cache may pollute the cache and make the hot index cache invalid. // So don't refill the cache if the index does not exist. + constexpr bool set_cache_if_miss = false; for (size_t file_idx = 0; file_idx < stable->files.size(); ++file_idx) { const auto & file = stable->files[file_idx]; - auto filter = DMFilePackFilter::loadFrom( - dm_context, - file, - /*set_cache_if_miss*/ false, - {range}, - RSOperatorPtr{}, - IdSetPtr{}); + auto filter + = DMFilePackFilter::loadFrom(dm_context, file, set_cache_if_miss, {range}, RSOperatorPtr{}, IdSetPtr{}); const auto & handle_filter_result = filter->getHandleRes(); if (file_idx == 0) { From 63fa5fe1f8dc6d2bd2bd187446c9b355d16acb61 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Thu, 19 Dec 2024 16:56:44 +0800 Subject: [PATCH 12/17] Storages: load RSResult only once Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/Segment.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 0ecdcd8de68..9fc58768391 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3149,7 +3149,6 @@ std::pair, std::vector> parseDMFilePackInfo( UInt32 preceded_rows = 0; auto file_provider = dm_context.global_context.getFileProvider(); - for (size_t i = 0; i < dmfiles.size(); ++i) { const auto & dmfile = dmfiles[i]; @@ -3220,7 +3219,6 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( }; auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts, dm_context); - if (skipped_ranges.size() == 1 && skipped_ranges[0].offset == 0 && skipped_ranges[0].rows == segment_snap->stable->getDMFilesRows()) { From b70fa360d6a3763d054e3a09e52a581f5f19e7cd Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 17:10:00 +0800 Subject: [PATCH 13/17] Storages: move ann_query_info to PushDownExecutor Signed-off-by: Lloyd-Pottiger --- dbms/src/Debug/MockStorage.cpp | 4 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 2 +- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 2 +- .../Flash/Coprocessor/InterpreterUtils.cpp | 4 +- .../src/Flash/tests/gtest_filter_executor.cpp | 2 +- .../src/Operators/DMSegmentThreadSourceOp.cpp | 6 +- dbms/src/Operators/DMSegmentThreadSourceOp.h | 4 +- ...olumnFileSetWithVectorIndexInputStream.cpp | 1 - .../ConcatSkippableBlockInputStream.cpp | 22 +-- .../ConcatSkippableBlockInputStream.h | 2 +- .../DeltaMerge/DMSegmentThreadInputStream.h | 4 +- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 14 +- .../src/Storages/DeltaMerge/DeltaMergeStore.h | 8 +- ...ushDownFilter.cpp => PushDownExecutor.cpp} | 34 +++- .../{PushDownFilter.h => PushDownExecutor.h} | 26 ++- .../Storages/DeltaMerge/Filter/RSOperator.cpp | 32 +--- .../Storages/DeltaMerge/Filter/RSOperator.h | 6 - .../DeltaMerge/Filter/WithANNQueryInfo.h | 65 ------- .../DeltaMerge/Remote/RNSegmentInputStream.h | 2 +- .../Remote/RNWorkerPrepareStreams.h | 10 +- .../Storages/DeltaMerge/Remote/RNWorkers.cpp | 2 +- .../Storages/DeltaMerge/Remote/RNWorkers.h | 2 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 160 +++++++++++++----- dbms/src/Storages/DeltaMerge/Segment.h | 23 ++- .../Storages/DeltaMerge/SegmentReadTask.cpp | 14 +- .../src/Storages/DeltaMerge/SegmentReadTask.h | 6 +- .../DeltaMerge/SegmentReadTaskPool.cpp | 2 +- .../Storages/DeltaMerge/SegmentReadTaskPool.h | 6 +- .../tests/gtest_dm_delta_merge_store.cpp | 12 +- ...test_dm_delta_merge_store_vector_index.cpp | 46 ++--- .../tests/gtest_dm_minmax_index.cpp | 2 +- .../tests/gtest_dm_vector_index.cpp | 6 +- .../tests/gtest_dm_vector_index_utils.h | 2 +- .../gtest_skippable_block_input_stream.cpp | 2 - .../tests/gtest_kvstore_fast_add_peer.cpp | 2 +- dbms/src/Storages/StorageDeltaMerge.cpp | 8 +- dbms/src/Storages/StorageDeltaMerge.h | 2 +- .../Storages/StorageDisaggregatedRemote.cpp | 17 +- .../tests/gtests_parse_push_down_filter.cpp | 85 +++++----- .../TiDB/Schema/tests/gtest_schema_sync.cpp | 4 +- 40 files changed, 336 insertions(+), 317 deletions(-) rename dbms/src/Storages/DeltaMerge/Filter/{PushDownFilter.cpp => PushDownExecutor.cpp} (86%) rename dbms/src/Storages/DeltaMerge/Filter/{PushDownFilter.h => PushDownExecutor.h} (79%) delete mode 100644 dbms/src/Storages/DeltaMerge/Filter/WithANNQueryInfo.h diff --git a/dbms/src/Debug/MockStorage.cpp b/dbms/src/Debug/MockStorage.cpp index 428e8c1f086..6e211406669 100644 --- a/dbms/src/Debug/MockStorage.cpp +++ b/dbms/src/Debug/MockStorage.cpp @@ -211,7 +211,7 @@ BlockInputStreamPtr MockStorage::getStreamFromDeltaMerge( rf_max_wait_time_ms, context.getTimezoneInfo()); auto [before_where, filter_column_name, project_after_where] - = analyzer->buildPushDownFilter(filter_conditions->conditions); + = analyzer->buildPushDownExecutor(filter_conditions->conditions); BlockInputStreams ins = storage->read( column_names, query_info, @@ -273,7 +273,7 @@ void MockStorage::buildExecFromDeltaMerge( rf_max_wait_time_ms, context.getTimezoneInfo()); // Not using `auto [before_where, filter_column_name, project_after_where]` just to make the compiler happy. - auto build_ret = analyzer->buildPushDownFilter(filter_conditions->conditions); + auto build_ret = analyzer->buildPushDownExecutor(filter_conditions->conditions); storage->read( exec_context_, group_builder, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index f419a693ff0..f2e68b5e620 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -972,7 +972,7 @@ String DAGExpressionAnalyzer::buildFilterColumn( return filter_column_name; } -std::tuple DAGExpressionAnalyzer::buildPushDownFilter( +std::tuple DAGExpressionAnalyzer::buildPushDownExecutor( const google::protobuf::RepeatedPtrField & conditions, bool null_as_false) { diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 8ef4dbc0b78..bdc9c9dcbe3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -148,7 +148,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable const google::protobuf::RepeatedPtrField & conditions, bool null_as_false = false); - std::tuple buildPushDownFilter( + std::tuple buildPushDownExecutor( const google::protobuf::RepeatedPtrField & conditions, bool null_as_false = false); diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 91a7ca5db9b..993c560f10c 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -443,7 +443,7 @@ void executePushedDownFilter( DAGPipeline & pipeline) { auto [before_where, filter_column_name, project_after_where] - = analyzer.buildPushDownFilter(filter_conditions.conditions, true); + = analyzer.buildPushDownExecutor(filter_conditions.conditions, true); for (auto & stream : pipeline.streams) { @@ -464,7 +464,7 @@ void executePushedDownFilter( LoggerPtr log) { auto [before_where, filter_column_name, project_after_where] - = analyzer.buildPushDownFilter(filter_conditions.conditions, true); + = analyzer.buildPushDownExecutor(filter_conditions.conditions, true); auto input_header = group_builder.getCurrentHeader(); for (size_t i = 0; i < group_builder.concurrency(); ++i) diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index 244a0fb8163..5fc2d1da541 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -282,7 +282,7 @@ try } CATCH -TEST_F(FilterExecutorTestRunner, PushDownFilter) +TEST_F(FilterExecutorTestRunner, PushDownExecutor) try { context.mockStorage()->setUseDeltaMerge(true); diff --git a/dbms/src/Operators/DMSegmentThreadSourceOp.cpp b/dbms/src/Operators/DMSegmentThreadSourceOp.cpp index 49e6ca0e19c..c6283d2f8d6 100644 --- a/dbms/src/Operators/DMSegmentThreadSourceOp.cpp +++ b/dbms/src/Operators/DMSegmentThreadSourceOp.cpp @@ -30,7 +30,7 @@ DMSegmentThreadSourceOp::DMSegmentThreadSourceOp( const DM::SegmentReadTaskPoolPtr & task_pool_, DM::AfterSegmentRead after_segment_read_, const DM::ColumnDefines & columns_to_read_, - const DM::PushDownFilterPtr & filter_, + const DM::PushDownExecutorPtr & executor_, UInt64 start_ts_, size_t expected_block_size_, DM::ReadMode read_mode_, @@ -40,7 +40,7 @@ DMSegmentThreadSourceOp::DMSegmentThreadSourceOp( , task_pool(task_pool_) , after_segment_read(after_segment_read_) , columns_to_read(columns_to_read_) - , filter(filter_) + , executor(executor_) , start_ts(start_ts_) , expected_block_size(expected_block_size_) , read_mode(read_mode_) @@ -100,7 +100,7 @@ OperatorStatus DMSegmentThreadSourceOp::executeIOImpl() columns_to_read, task->read_snapshot, task->ranges, - filter, + executor, start_ts, block_size); LOG_TRACE(log, "Start to read segment, segment={}", cur_segment->simpleInfo()); diff --git a/dbms/src/Operators/DMSegmentThreadSourceOp.h b/dbms/src/Operators/DMSegmentThreadSourceOp.h index 0dc0fda15ec..fa1cbc21676 100644 --- a/dbms/src/Operators/DMSegmentThreadSourceOp.h +++ b/dbms/src/Operators/DMSegmentThreadSourceOp.h @@ -34,7 +34,7 @@ class DMSegmentThreadSourceOp : public SourceOp const DM::SegmentReadTaskPoolPtr & task_pool_, DM::AfterSegmentRead after_segment_read_, const DM::ColumnDefines & columns_to_read_, - const DM::PushDownFilterPtr & filter_, + const DM::PushDownExecutorPtr & executor_, UInt64 start_ts_, size_t expected_block_size_, DM::ReadMode read_mode_, @@ -56,7 +56,7 @@ class DMSegmentThreadSourceOp : public SourceOp DM::SegmentReadTaskPoolPtr task_pool; DM::AfterSegmentRead after_segment_read; DM::ColumnDefines columns_to_read; - DM::PushDownFilterPtr filter; + DM::PushDownExecutorPtr executor; const UInt64 start_ts; const size_t expected_block_size; const DM::ReadMode read_mode; diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetWithVectorIndexInputStream.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetWithVectorIndexInputStream.cpp index 825477d5e67..054753cf2cb 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetWithVectorIndexInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileSetWithVectorIndexInputStream.cpp @@ -15,7 +15,6 @@ #include #include #include -#include namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.cpp index 6cdc5e2a32f..f4b0f4133e8 100644 --- a/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.cpp @@ -266,13 +266,12 @@ Block ConcatVectorIndexBlockInputStream::read() return block; } -SkippableBlockInputStreamPtr ConcatVectorIndexBlockInputStream::build( +std::tuple ConcatVectorIndexBlockInputStream::build( const BitmapFilterPtr & bitmap_filter, std::shared_ptr> stream, const ANNQueryInfoPtr & ann_query_info) { - if (!ann_query_info) - return stream; + assert(ann_query_info != nullptr); bool has_vector_index_stream = false; std::vector index_streams; index_streams.reserve(stream->children.size()); @@ -287,13 +286,16 @@ SkippableBlockInputStreamPtr ConcatVectorIndexBlockInputStream::build( index_streams.push_back(nullptr); } if (!has_vector_index_stream) - return stream; - - return std::make_shared( - bitmap_filter, - stream, - std::move(index_streams), - ann_query_info->top_k()); + return {stream, false}; + + return { + std::make_shared( + bitmap_filter, + stream, + std::move(index_streams), + ann_query_info->top_k()), + true, + }; } } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h b/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h index 48d988fe11e..5ad92d5f28e 100644 --- a/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h @@ -77,7 +77,7 @@ class ConcatVectorIndexBlockInputStream : public SkippableBlockInputStream , bitmap_filter(bitmap_filter_) {} - static SkippableBlockInputStreamPtr build( + static std::tuple build( const BitmapFilterPtr & bitmap_filter, std::shared_ptr> stream, const ANNQueryInfoPtr & ann_query_info); diff --git a/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h b/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h index 685f6b37e82..755fd7e3258 100644 --- a/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h +++ b/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h @@ -42,7 +42,7 @@ class DMSegmentThreadInputStream : public IProfilingBlockInputStream const SegmentReadTaskPoolPtr & task_pool_, AfterSegmentRead after_segment_read_, const ColumnDefines & columns_to_read_, - const PushDownFilterPtr & filter_, + const PushDownExecutorPtr & filter_, UInt64 start_ts_, size_t expected_block_size_, ReadMode read_mode_, @@ -127,7 +127,7 @@ class DMSegmentThreadInputStream : public IProfilingBlockInputStream SegmentReadTaskPoolPtr task_pool; AfterSegmentRead after_segment_read; ColumnDefines columns_to_read; - PushDownFilterPtr filter; + PushDownExecutorPtr filter; Block header; const UInt64 start_ts; const size_t expected_block_size; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 0916b4d8f15..17a3cc29df6 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -1228,12 +1228,12 @@ ReadMode DeltaMergeStore::getReadMode( const Context & db_context, bool is_fast_scan, bool keep_order, - const PushDownFilterPtr & filter) + const PushDownExecutorPtr & filter) { auto read_mode = getReadModeImpl(db_context, is_fast_scan, keep_order); RUNTIME_CHECK_MSG( !filter || !filter->before_where || read_mode == ReadMode::Bitmap, - "Push down filters needs bitmap, push down filters is empty: {}, read mode: {}", + "Push down executor needs bitmap, push down executor is empty: {}, read mode: {}", filter == nullptr || filter->before_where == nullptr, magic_enum::enum_name(read_mode)); return read_mode; @@ -1246,7 +1246,7 @@ BlockInputStreams DeltaMergeStore::read( const RowKeyRanges & sorted_ranges, size_t num_streams, UInt64 start_ts, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & filter, const RuntimeFilteList & runtime_filter_list, int rf_max_wait_time_ms, const String & tracing_id, @@ -1332,7 +1332,7 @@ BlockInputStreams DeltaMergeStore::read( LOG_INFO( tracing_logger, "Read create stream done, keep_order={} dt_enable_read_thread={} enable_read_thread={} " - "is_fast_scan={} is_push_down_filter_empty={} pool_id={} num_streams={} columns_to_read={} " + "is_fast_scan={} is_push_down_executor_empty={} pool_id={} num_streams={} columns_to_read={} " "final_columns_to_read={}", keep_order, db_context.getSettingsRef().dt_enable_read_thread, @@ -1356,7 +1356,7 @@ void DeltaMergeStore::read( const RowKeyRanges & sorted_ranges, size_t num_streams, UInt64 start_ts, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & filter, const RuntimeFilteList & runtime_filter_list, int rf_max_wait_time_ms, const String & tracing_id, @@ -1452,7 +1452,7 @@ void DeltaMergeStore::read( LOG_INFO( tracing_logger, "Read create PipelineExec done, keep_order={} dt_enable_read_thread={} enable_read_thread={} " - "is_fast_scan={} is_push_down_filter_empty={} pool_id={} num_streams={} columns_to_read={} " + "is_fast_scan={} is_push_down_executor_empty={} pool_id={} num_streams={} columns_to_read={} " "final_columns_to_read={}", keep_order, db_context.getSettingsRef().dt_enable_read_thread, diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h index 1ec73eb377b..d72041946df 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.h @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -460,7 +460,7 @@ class DeltaMergeStore const RowKeyRanges & sorted_ranges, size_t num_streams, UInt64 start_ts, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & filter, const RuntimeFilteList & runtime_filter_list, int rf_max_wait_time_ms, const String & tracing_id, @@ -485,7 +485,7 @@ class DeltaMergeStore const RowKeyRanges & sorted_ranges, size_t num_streams, UInt64 start_ts, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & filter, const RuntimeFilteList & runtime_filter_list, int rf_max_wait_time_ms, const String & tracing_id, @@ -588,7 +588,7 @@ class DeltaMergeStore const Context & db_context, bool is_fast_scan, bool keep_order, - const PushDownFilterPtr & filter); + const PushDownExecutorPtr & filter); // Get a snap of local_index_infos for checking. // Note that this is just a shallow copy of `local_index_infos`, do not diff --git a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.cpp b/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp similarity index 86% rename from dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.cpp rename to dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp index b2f6fc3ca7c..ce579acfc6b 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp @@ -18,24 +18,35 @@ #include #include #include -#include +#include #include #include namespace DB::DM { -PushDownFilterPtr PushDownFilter::build( +PushDownExecutorPtr PushDownExecutor::build( const RSOperatorPtr & rs_operator, + const ANNQueryInfoPtr & ann_query_info, const TiDB::ColumnInfos & table_scan_column_info, const google::protobuf::RepeatedPtrField & pushed_down_filters, const ColumnDefines & columns_to_read, const Context & context, const LoggerPtr & tracing_logger) { + // check if the ann_query_info is valid + auto valid_ann_query_info = ann_query_info; + bool is_valid_ann_query = ann_query_info->top_k() != std::numeric_limits::max(); + bool is_matching_ann_query = std::any_of( + columns_to_read.begin(), + columns_to_read.end(), + [cid = ann_query_info->column_id()](const ColumnDefine & cd) -> bool { return cd.id == cid; }); + if (!is_valid_ann_query || !is_matching_ann_query) + valid_ann_query_info = nullptr; + if (pushed_down_filters.empty()) { LOG_DEBUG(tracing_logger, "Push down filter is empty"); - return std::make_shared(rs_operator); + return std::make_shared(rs_operator, valid_ann_query_info); } std::unordered_map columns_to_read_map; for (const auto & column : columns_to_read) @@ -120,7 +131,7 @@ PushDownFilterPtr PushDownFilter::build( } // build filter expression actions - auto [before_where, filter_column_name, project_after_where] = analyzer->buildPushDownFilter(pushed_down_filters); + auto [before_where, filter_column_name, project_after_where] = analyzer->buildPushDownExecutor(pushed_down_filters); LOG_DEBUG(tracing_logger, "Push down filter: {}", before_where->dumpActions()); // record current column defines @@ -145,8 +156,9 @@ PushDownFilterPtr PushDownFilter::build( } } - return std::make_shared( + return std::make_shared( rs_operator, + valid_ann_query_info, before_where, project_after_where, filter_columns, @@ -155,7 +167,7 @@ PushDownFilterPtr PushDownFilter::build( columns_after_cast); } -PushDownFilterPtr PushDownFilter::build( +PushDownExecutorPtr PushDownExecutor::build( const SelectQueryInfo & query_info, const ColumnDefines & columns_to_read, const ColumnDefines & table_column_defines, @@ -174,6 +186,10 @@ PushDownFilterPtr PushDownFilter::build( table_column_defines, context.getSettingsRef().dt_enable_rough_set_filter, tracing_logger); + // build ann_query_info + ANNQueryInfoPtr ann_query_info = nullptr; + if (dag_query->ann_query_info.query_type() != tipb::ANNQueryType::InvalidQueryType) + ann_query_info = std::make_shared(dag_query->ann_query_info); // build push down filter const auto & pushed_down_filters = dag_query->pushed_down_filters; if (unlikely(context.getSettingsRef().force_push_down_all_filters_to_scan) && !dag_query->filters.empty()) @@ -182,16 +198,18 @@ PushDownFilterPtr PushDownFilter::build( pushed_down_filters.begin(), pushed_down_filters.end()}; merged_filters.MergeFrom(dag_query->filters); - return PushDownFilter::build( + return PushDownExecutor::build( rs_operator, + ann_query_info, columns_to_read_info, merged_filters, columns_to_read, context, tracing_logger); } - return PushDownFilter::build( + return PushDownExecutor::build( rs_operator, + ann_query_info, columns_to_read_info, pushed_down_filters, columns_to_read, diff --git a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h b/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.h similarity index 79% rename from dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h rename to dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.h index eb680b6c45e..827c7437e1e 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h +++ b/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.h @@ -26,15 +26,16 @@ struct SelectQueryInfo; namespace DB::DM { -class PushDownFilter; -using PushDownFilterPtr = std::shared_ptr; -inline static const PushDownFilterPtr EMPTY_FILTER{}; +class PushDownExecutor; +using PushDownExecutorPtr = std::shared_ptr; +inline static const PushDownExecutorPtr EMPTY_FILTER{}; -class PushDownFilter +class PushDownExecutor { public: - PushDownFilter( + PushDownExecutor( const RSOperatorPtr & rs_operator_, + const ANNQueryInfoPtr & ann_query_info_, const ExpressionActionsPtr & beofre_where_, const ExpressionActionsPtr & project_after_where_, const ColumnDefinesPtr & filter_columns_, @@ -48,15 +49,22 @@ class PushDownFilter , filter_columns(filter_columns_) , extra_cast(extra_cast_) , columns_after_cast(columns_after_cast_) + , ann_query_info(ann_query_info_) {} - explicit PushDownFilter(const RSOperatorPtr & rs_operator_) + explicit PushDownExecutor(const RSOperatorPtr & rs_operator_, const ANNQueryInfoPtr & ann_query_info_ = nullptr) : rs_operator(rs_operator_) + , ann_query_info(ann_query_info_) + {} + + explicit PushDownExecutor(const ANNQueryInfoPtr & ann_query_info_) + : ann_query_info(ann_query_info_) {} // Use by StorageDisaggregated. - static PushDownFilterPtr build( + static PushDownExecutorPtr build( const DM::RSOperatorPtr & rs_operator, + const ANNQueryInfoPtr & ann_query_info, const TiDB::ColumnInfos & table_scan_column_info, const google::protobuf::RepeatedPtrField & pushed_down_filters, const ColumnDefines & columns_to_read, @@ -64,7 +72,7 @@ class PushDownFilter const LoggerPtr & tracing_logger); // Use by StorageDeltaMerge. - static DM::PushDownFilterPtr build( + static DM::PushDownExecutorPtr build( const SelectQueryInfo & query_info, const ColumnDefines & columns_to_read, const ColumnDefines & table_column_defines, @@ -87,6 +95,8 @@ class PushDownFilter const ExpressionActionsPtr extra_cast; // If the extra_cast is not null, the types of the columns may be changed const ColumnDefinesPtr columns_after_cast; + // The ANNQueryInfo contains the information of the ANN index + const ANNQueryInfoPtr ann_query_info; }; } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp index 8970f3aab52..5b839694d23 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp +++ b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -86,36 +85,7 @@ RSOperatorPtr RSOperator::build( if (likely(rs_operator != DM::EMPTY_RS_OPERATOR)) LOG_DEBUG(tracing_logger, "Rough set filter: {}", rs_operator->toDebugString()); - ANNQueryInfoPtr ann_query_info = nullptr; - if (dag_query->ann_query_info.query_type() != tipb::ANNQueryType::InvalidQueryType) - ann_query_info = std::make_shared(dag_query->ann_query_info); - if (!ann_query_info) - return rs_operator; - - bool is_valid_ann_query = ann_query_info->top_k() != std::numeric_limits::max(); - bool is_matching_ann_query = std::any_of( - table_column_defines.begin(), - table_column_defines.end(), - [cid = ann_query_info->column_id()](const ColumnDefine & cd) -> bool { return cd.id == cid; }); - if (!is_valid_ann_query || !is_matching_ann_query) - return rs_operator; - - return wrapWithANNQueryInfo(rs_operator, ann_query_info); -} - -RSOperatorPtr wrapWithANNQueryInfo(const RSOperatorPtr & op, const ANNQueryInfoPtr & ann_query_info) -{ - return std::make_shared(op, ann_query_info); -} - -ANNQueryInfoPtr getANNQueryInfo(const RSOperatorPtr & op) -{ - if (op == nullptr) - return nullptr; - auto with_ann = std::dynamic_pointer_cast(op); - if (with_ann == nullptr) - return nullptr; - return with_ann->ann_query_info; + return rs_operator; } } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h index a376965bbf9..3accf33e9b2 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h +++ b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h @@ -164,10 +164,4 @@ RSOperatorPtr createIsNull(const Attr & attr); // RSOperatorPtr createUnsupported(const String & reason); -// Wrap with a ANNQueryInfo -RSOperatorPtr wrapWithANNQueryInfo(const RSOperatorPtr & op, const ANNQueryInfoPtr & ann_query_info); - -// Get ANNQueryInfo from RSOperator -ANNQueryInfoPtr getANNQueryInfo(const RSOperatorPtr & op); - } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/WithANNQueryInfo.h b/dbms/src/Storages/DeltaMerge/Filter/WithANNQueryInfo.h deleted file mode 100644 index df721a93edd..00000000000 --- a/dbms/src/Storages/DeltaMerge/Filter/WithANNQueryInfo.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2024 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -namespace DB::DM -{ - -// TODO(vector-index): find a more elegant way for passing ANNQueryInfo down for -// building `DMFileWithVectorIndexBlockInputStream` -class WithANNQueryInfo : public RSOperator -{ -public: - const RSOperatorPtr child; - const ANNQueryInfoPtr ann_query_info; - - explicit WithANNQueryInfo(const RSOperatorPtr & child_, const ANNQueryInfoPtr & ann_query_info_) - : child(child_) - , ann_query_info(ann_query_info_) - { - RUNTIME_CHECK(ann_query_info != nullptr); - } - - String name() override { return "ann"; } - - String toDebugString() override - { - if (child) - return child->toDebugString(); - else - return ""; - } - - ColIds getColumnIDs() override - { - if (child) - return child->getColumnIDs(); - else - return {}; - } - - RSResults roughCheck(size_t start_pack, size_t pack_count, const RSCheckParam & param) override - { - if (child) - return child->roughCheck(start_pack, pack_count, param); - else - return RSResults(pack_count, RSResult::Some); - } -}; - -} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Remote/RNSegmentInputStream.h b/dbms/src/Storages/DeltaMerge/Remote/RNSegmentInputStream.h index d6af96784f6..69b919bbc6f 100644 --- a/dbms/src/Storages/DeltaMerge/Remote/RNSegmentInputStream.h +++ b/dbms/src/Storages/DeltaMerge/Remote/RNSegmentInputStream.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/Remote/RNWorkerPrepareStreams.h b/dbms/src/Storages/DeltaMerge/Remote/RNWorkerPrepareStreams.h index b38f598de1a..72c95df70be 100644 --- a/dbms/src/Storages/DeltaMerge/Remote/RNWorkerPrepareStreams.h +++ b/dbms/src/Storages/DeltaMerge/Remote/RNWorkerPrepareStreams.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -42,7 +42,7 @@ class RNWorkerPrepareStreams task->initInputStream( *columns_to_read, start_ts, - push_down_filter, + push_down_executor, read_mode, settings.max_block_size, settings.dt_enable_delta_index_error_fallback); @@ -54,7 +54,7 @@ class RNWorkerPrepareStreams public: const ColumnDefinesPtr columns_to_read; const UInt64 start_ts; - const PushDownFilterPtr push_down_filter; + const PushDownExecutorPtr push_down_executor; const ReadMode read_mode; public: @@ -66,7 +66,7 @@ class RNWorkerPrepareStreams const size_t concurrency; const ColumnDefinesPtr & columns_to_read; const UInt64 start_ts; - const PushDownFilterPtr & push_down_filter; + const PushDownExecutorPtr & push_down_executor; const ReadMode read_mode; }; @@ -83,7 +83,7 @@ class RNWorkerPrepareStreams options.concurrency) , columns_to_read(options.columns_to_read) , start_ts(options.start_ts) - , push_down_filter(options.push_down_filter) + , push_down_executor(options.push_down_executor) , read_mode(options.read_mode) {} diff --git a/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.cpp b/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.cpp index 975dae7db25..48bb9c150a8 100644 --- a/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.cpp +++ b/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.cpp @@ -61,7 +61,7 @@ RNWorkers::RNWorkers( .concurrency = prepare_streams_concurrency, .columns_to_read = options.columns_to_read, .start_ts = options.start_ts, - .push_down_filter = options.push_down_filter, + .push_down_executor = options.push_down_executor, .read_mode = options.read_mode, }); diff --git a/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.h b/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.h index 69a44b52d81..28ba8b95dc3 100644 --- a/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.h +++ b/dbms/src/Storages/DeltaMerge/Remote/RNWorkers.h @@ -47,7 +47,7 @@ class RNWorkers : private boost::noncopyable const LoggerPtr log; const ColumnDefinesPtr & columns_to_read; const UInt64 start_ts; - const PushDownFilterPtr & push_down_filter; + const PushDownExecutorPtr & push_down_executor; const ReadMode read_mode; }; diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 9fc58768391..e0cc0219efa 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -936,7 +936,7 @@ BlockInputStreamPtr Segment::getInputStream( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & executor, UInt64 start_ts, size_t expected_block_size) { @@ -958,7 +958,7 @@ BlockInputStreamPtr Segment::getInputStream( dmfile, /*set_cache_if_miss*/ true, read_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + executor ? executor->rs_operator : EMPTY_RS_OPERATOR, /*read_pack*/ {}); pack_filter_results.push_back(result); } @@ -995,7 +995,7 @@ BlockInputStreamPtr Segment::getInputStream( columns_to_read, segment_snap, read_ranges, - filter, + executor, pack_filter_results, start_ts, expected_block_size, @@ -3299,12 +3299,66 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( } SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( + const SegmentSnapshotPtr & segment_snap, + const DMContext & dm_context, + const ColumnDefines & columns_to_read, + const RowKeyRanges & read_ranges, + const DMFilePackFilterResults & pack_filter_results, + UInt64 start_ts, + size_t expected_block_size, + ReadTag read_tag) +{ + static constexpr bool NeedRowID = false; + // set `is_fast_scan` to true to try to enable clean read + auto enable_handle_clean_read = !hasColumn(columns_to_read, EXTRA_HANDLE_COLUMN_ID); + constexpr auto is_fast_scan = true; + auto enable_del_clean_read = !hasColumn(columns_to_read, TAG_COLUMN_ID); + + SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->getInputStream( + dm_context, + columns_to_read, + read_ranges, + start_ts, + expected_block_size, + enable_handle_clean_read, + read_tag, + pack_filter_results, + is_fast_scan, + enable_del_clean_read, + /* read_packs */ {}, + NeedRowID); + + auto columns_to_read_ptr = std::make_shared(columns_to_read); + + auto memtable = segment_snap->delta->getMemTableSetSnapshot(); + auto persisted_files = segment_snap->delta->getPersistedFileSetSnapshot(); + SkippableBlockInputStreamPtr mem_table_stream = std::make_shared( + dm_context, + memtable, + columns_to_read_ptr, + this->rowkey_range, + read_tag); + SkippableBlockInputStreamPtr persisted_files_stream = std::make_shared( + dm_context, + persisted_files, + columns_to_read_ptr, + this->rowkey_range, + read_tag); + + auto stream = std::dynamic_pointer_cast>(stable_stream); + assert(stream != nullptr); + stream->appendChild(persisted_files_stream, persisted_files->getRows()); + stream->appendChild(mem_table_stream, memtable->getRows()); + return stream; +} + +std::tuple Segment::getConcatVectorIndexBlockInputStream( BitmapFilterPtr bitmap_filter, const SegmentSnapshotPtr & segment_snap, const DMContext & dm_context, const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const ANNQueryInfoPtr & ann_query_info, const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, @@ -3316,7 +3370,6 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( constexpr auto is_fast_scan = true; auto enable_del_clean_read = !hasColumn(columns_to_read, TAG_COLUMN_ID); - auto ann_query_info = getANNQueryInfo(filter); SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->tryGetInputStreamWithVectorIndex( dm_context, columns_to_read, @@ -3367,19 +3420,17 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & data_ranges, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & executor, const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { - const auto & filter_columns = filter->filter_columns; + const auto & filter_columns = executor->filter_columns; BlockInputStreamPtr filter_column_stream = getConcatSkippableBlockInputStream( - bitmap_filter, segment_snap, dm_context, *filter_columns, data_ranges, - filter->rs_operator, pack_filter_results, start_ts, expected_block_size, @@ -3394,29 +3445,31 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( filter_columns->size()); BlockInputStreamPtr stream = std::make_shared(*filter_columns, filter_column_stream, bitmap_filter); - if (filter->extra_cast) + if (executor->extra_cast) { - stream = std::make_shared(stream, filter->extra_cast, dm_context.tracing_id); + stream = std::make_shared(stream, executor->extra_cast, dm_context.tracing_id); stream->setExtraInfo("cast after tableScan"); } stream = std::make_shared( stream, - filter->before_where, - filter->filter_column_name, + executor->before_where, + executor->filter_column_name, dm_context.tracing_id); stream->setExtraInfo("push down filter"); - stream - = std::make_shared(stream, filter->project_after_where, dm_context.tracing_id); + stream = std::make_shared( + stream, + executor->project_after_where, + dm_context.tracing_id); stream->setExtraInfo("project after where"); return stream; } // construct extra cast stream if needed - if (filter->extra_cast) + if (executor->extra_cast) { filter_column_stream = std::make_shared( filter_column_stream, - filter->extra_cast, + executor->extra_cast, dm_context.tracing_id); filter_column_stream->setExtraInfo("cast after tableScan"); } @@ -3424,8 +3477,8 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( // construct filter stream filter_column_stream = std::make_shared( filter_column_stream, - filter->before_where, - filter->filter_column_name, + executor->before_where, + executor->filter_column_name, dm_context.tracing_id); filter_column_stream->setExtraInfo("push down filter"); @@ -3443,12 +3496,10 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( // construct stream for the rest columns auto rest_column_stream = getConcatSkippableBlockInputStream( - bitmap_filter, segment_snap, dm_context, *rest_columns_to_read, data_ranges, - filter->rs_operator, pack_filter_results, start_ts, expected_block_size, @@ -3457,7 +3508,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( // construct late materialization stream return std::make_shared( columns_to_read, - filter->filter_column_name, + executor->filter_column_name, filter_column_stream, rest_column_stream, bitmap_filter, @@ -3486,7 +3537,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & executor, const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t build_bitmap_filter_block_rows, @@ -3512,7 +3563,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( segment_snap->stable->clearColumnCaches(); } - if (filter && filter->before_where) + if (executor && executor->before_where) { // if has filter conditions pushed down, use late materialization return getLateMaterializationStream( @@ -3521,34 +3572,51 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( columns_to_read, segment_snap, real_ranges, - filter, + executor, pack_filter_results, start_ts, read_data_block_rows); } - auto stream = getConcatSkippableBlockInputStream( - bitmap_filter, - segment_snap, - dm_context, - columns_to_read, - real_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, - pack_filter_results, - start_ts, - read_data_block_rows, - ReadTag::Query); - if (auto * vector_index_stream = dynamic_cast(stream.get()); - vector_index_stream) + SkippableBlockInputStreamPtr stream; + if (executor && executor->ann_query_info) { - // For vector search, there are more likely to return small blocks from different - // sub-streams. Squash blocks to reduce the number of blocks thus improve the - // performance of upper layer. - return std::make_shared( - stream, - /*min_block_size_rows=*/read_data_block_rows, - /*min_block_size_bytes=*/0, - dm_context.tracing_id); + // If has ANN index query, use ANN index query + bool is_vector = false; + std::tie(stream, is_vector) = getConcatVectorIndexBlockInputStream( + bitmap_filter, + segment_snap, + dm_context, + columns_to_read, + real_ranges, + executor->ann_query_info, + pack_filter_results, + start_ts, + read_data_block_rows, + ReadTag::Query); + if (is_vector) + { + // For vector search, there are more likely to return small blocks from different + // sub-streams. Squash blocks to reduce the number of blocks thus improve the + // performance of upper layer. + return std::make_shared( + stream, + /*min_block_size_rows=*/read_data_block_rows, + /*min_block_size_bytes=*/0, + dm_context.tracing_id); + } + } + else + { + stream = getConcatSkippableBlockInputStream( + segment_snap, + dm_context, + columns_to_read, + real_ranges, + pack_filter_results, + start_ts, + read_data_block_rows, + ReadTag::Query); } return std::make_shared(columns_to_read, stream, bitmap_filter); } diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index b8ba5b3400f..f32e43e2add 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -41,8 +41,8 @@ class StableValueSpace; using StableValueSpacePtr = std::shared_ptr; class DeltaValueSpace; using DeltaValueSpacePtr = std::shared_ptr; -class PushDownFilter; -using PushDownFilterPtr = std::shared_ptr; +class PushDownExecutor; +using PushDownExecutorPtr = std::shared_ptr; enum class ReadMode; @@ -230,7 +230,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & executor, UInt64 start_ts, size_t expected_block_size); @@ -751,13 +751,22 @@ class Segment const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); - SkippableBlockInputStreamPtr getConcatSkippableBlockInputStream( + std::tuple getConcatVectorIndexBlockInputStream( BitmapFilterPtr bitmap_filter, const SegmentSnapshotPtr & segment_snap, const DMContext & dm_context, const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const ANNQueryInfoPtr & ann_query_info, + const DMFilePackFilterResults & pack_filter_results, + UInt64 start_ts, + size_t expected_block_size, + ReadTag read_tag); + SkippableBlockInputStreamPtr getConcatSkippableBlockInputStream( + const SegmentSnapshotPtr & segment_snap, + const DMContext & dm_context, + const ColumnDefines & columns_to_read, + const RowKeyRanges & read_ranges, const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, @@ -767,7 +776,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & executor, const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t build_bitmap_filter_block_rows, @@ -779,7 +788,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & data_ranges, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & executor, const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp index 28822cc26af..afe4a2b076e 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp @@ -260,7 +260,7 @@ void SegmentReadTask::initColumnFileDataProvider(const Remote::RNLocalPageCacheG void SegmentReadTask::initInputStream( const ColumnDefines & columns_to_read, UInt64 start_ts, - const PushDownFilterPtr & push_down_filter, + const PushDownExecutorPtr & push_down_executor, ReadMode read_mode, size_t expected_block_size, bool enable_delta_index_error_fallback) @@ -268,7 +268,7 @@ void SegmentReadTask::initInputStream( if (likely(doInitInputStreamWithErrorFallback( columns_to_read, start_ts, - push_down_filter, + push_down_executor, read_mode, expected_block_size, enable_delta_index_error_fallback))) @@ -283,20 +283,20 @@ void SegmentReadTask::initInputStream( { cache->setDeltaIndex(read_snapshot->delta->getSharedDeltaIndex()); } - doInitInputStream(columns_to_read, start_ts, push_down_filter, read_mode, expected_block_size); + doInitInputStream(columns_to_read, start_ts, push_down_executor, read_mode, expected_block_size); } bool SegmentReadTask::doInitInputStreamWithErrorFallback( const ColumnDefines & columns_to_read, UInt64 start_ts, - const PushDownFilterPtr & push_down_filter, + const PushDownExecutorPtr & push_down_executor, ReadMode read_mode, size_t expected_block_size, bool enable_delta_index_error_fallback) { try { - doInitInputStream(columns_to_read, start_ts, push_down_filter, read_mode, expected_block_size); + doInitInputStream(columns_to_read, start_ts, push_down_executor, read_mode, expected_block_size); return true; } catch (const Exception & e) @@ -316,7 +316,7 @@ bool SegmentReadTask::doInitInputStreamWithErrorFallback( void SegmentReadTask::doInitInputStream( const ColumnDefines & columns_to_read, UInt64 start_ts, - const PushDownFilterPtr & push_down_filter, + const PushDownExecutorPtr & push_down_executor, ReadMode read_mode, size_t expected_block_size) { @@ -333,7 +333,7 @@ void SegmentReadTask::doInitInputStream( columns_to_read, read_snapshot, ranges, - push_down_filter, + push_down_executor, start_ts, expected_block_size); } diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTask.h b/dbms/src/Storages/DeltaMerge/SegmentReadTask.h index e7e6f1d9ab6..af4d3cc7268 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTask.h +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTask.h @@ -103,7 +103,7 @@ struct SegmentReadTask void initInputStream( const ColumnDefines & columns_to_read, UInt64 start_ts, - const PushDownFilterPtr & push_down_filter, + const PushDownExecutorPtr & push_down_executor, ReadMode read_mode, size_t expected_block_size, bool enable_delta_index_error_fallback); @@ -140,7 +140,7 @@ struct SegmentReadTask bool doInitInputStreamWithErrorFallback( const ColumnDefines & columns_to_read, UInt64 start_ts, - const PushDownFilterPtr & push_down_filter, + const PushDownExecutorPtr & push_down_executor, ReadMode read_mode, size_t expected_block_size, bool enable_delta_index_error_fallback); @@ -148,7 +148,7 @@ struct SegmentReadTask void doInitInputStream( const ColumnDefines & columns_to_read, UInt64 start_ts, - const PushDownFilterPtr & push_down_filter, + const PushDownExecutorPtr & push_down_executor, ReadMode read_mode, size_t expected_block_size); diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp index b5506f45c42..eb00d24ec5e 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.cpp @@ -117,7 +117,7 @@ BlockInputStreamPtr SegmentReadTaskPool::buildInputStream(SegmentReadTaskPtr & t SegmentReadTaskPool::SegmentReadTaskPool( int extra_table_id_index_, const ColumnDefines & columns_to_read_, - const PushDownFilterPtr & filter_, + const PushDownExecutorPtr & filter_, uint64_t start_ts_, size_t expected_block_size_, ReadMode read_mode_, diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h index 8432342fc88..76dda96f18d 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTaskPool.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -111,7 +111,7 @@ class SegmentReadTaskPool SegmentReadTaskPool( int extra_table_id_index_, const ColumnDefines & columns_to_read_, - const PushDownFilterPtr & filter_, + const PushDownExecutorPtr & filter_, uint64_t start_ts_, size_t expected_block_size_, ReadMode read_mode_, @@ -214,7 +214,7 @@ class SegmentReadTaskPool const int extra_table_id_index; ColumnDefines columns_to_read; - PushDownFilterPtr filter; + PushDownExecutorPtr filter; const uint64_t start_ts; const size_t expected_block_size; const ReadMode read_mode; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 8a4b363a269..6cc96bdc63d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -65,7 +65,7 @@ extern const char proactive_flush_force_set_type[]; namespace DB::tests { -DM::PushDownFilterPtr generatePushDownFilter( +DM::PushDownExecutorPtr generatePushDownExecutor( Context & ctx, const String & table_info_json, const String & query, @@ -582,7 +582,7 @@ try {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, /* num_streams= */ 1, /* start_ts= */ std::numeric_limits::max(), - std::make_shared(filter), + std::make_shared(filter), std::vector{}, 0, TRACING_NAME, @@ -4108,7 +4108,7 @@ try return block; }; - auto check = [&](PushDownFilterPtr filter, RSResult expected_res, const std::vector & expected_data) { + auto check = [&](PushDownExecutorPtr filter, RSResult expected_res, const std::vector & expected_data) { auto in = store->read( *db_context, db_context->getSettingsRef(), @@ -4153,7 +4153,7 @@ try })json"; auto create_filter = [&](Int64 value) { - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( *db_context, table_info_json, fmt::format("select * from default.t_111 where col_time >= {}", value)); @@ -4237,7 +4237,7 @@ try return block; }; - auto check = [&](PushDownFilterPtr filter, RSResult expected_res, const std::vector & expected_data) { + auto check = [&](PushDownExecutorPtr filter, RSResult expected_res, const std::vector & expected_data) { auto in = store->read( *db_context, db_context->getSettingsRef(), @@ -4283,7 +4283,7 @@ try })json"; auto create_filter = [&](Int64 value) { - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( *db_context, table_info_json, fmt::format("select * from default.t_111 where col_time >= {}", value)); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_vector_index.cpp index 69f123e035b..83fe42b5038 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_vector_index.cpp @@ -98,7 +98,7 @@ class DeltaMergeStoreVectorTest store->write(*db_context, db_context->getSettingsRef(), block); } - void read(const RowKeyRange & range, const PushDownFilterPtr & filter, const ColumnWithTypeAndName & out) + void read(const RowKeyRange & range, const PushDownExecutorPtr & filter, const ColumnWithTypeAndName & out) { auto in = store->read( *db_context, @@ -253,7 +253,7 @@ try ann_query_info->set_top_k(2); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({127.5})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{127.0}, {128.0}})); } @@ -262,7 +262,7 @@ try ann_query_info->set_top_k(2); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({72.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{72.0}, {73.0}})); } } @@ -303,7 +303,7 @@ try ann_query_info->set_top_k(2); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({72.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{72.0}, {73.0}})); } @@ -312,7 +312,7 @@ try ann_query_info->set_top_k(2); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({127.5})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{127.0}, {128.0}})); } } @@ -360,7 +360,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({72.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); // [0, 128) with vector index return 72.0, [128, 130) without vector index return all. read(range, filter, createVecFloat32Column({{72.0}, {128.0}, {129.0}})); } @@ -370,7 +370,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({72.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); // [0, 128) with vector index return 72.0, [128, 130) without vector index return all. read(range, filter, createVecFloat32Column({{72.0}, {128.0}, {129.0}})); } @@ -419,7 +419,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); // [0, 4) without vector index return all. read(range, filter, createVecFloat32Column({{0.0}, {1.0}, {2.0}, {3.0}})); } @@ -429,7 +429,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); // [0, 4) without vector index return all. read(range, filter, createVecFloat32Column({{0.0}, {1.0}, {2.0}, {3.0}})); } @@ -502,7 +502,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(left_segment_range, filter, createVecFloat32Column({{2.0}})); } @@ -512,7 +512,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({222.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(left_segment_range, filter, createVecFloat32Column({{127.0}})); } @@ -535,7 +535,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{2.0}})); } @@ -545,7 +545,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({122.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{122.0}})); } @@ -630,7 +630,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(left_segment_range, filter, createVecFloat32Column({{2.0}})); } @@ -640,7 +640,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({222.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(left_segment_range, filter, createVecFloat32Column({{127.0}})); } @@ -663,7 +663,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{2.0}})); } @@ -673,7 +673,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({122.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{122.0}})); } @@ -753,7 +753,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{2.0}})); } @@ -763,7 +763,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{2.0}})); } @@ -841,7 +841,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{2.0}})); } @@ -851,7 +851,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({222.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{222.0}})); } @@ -928,7 +928,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({2.0})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{2.0}})); } @@ -938,7 +938,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(encodeVectorFloat32({222.1})); - auto filter = std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); read(range, filter, createVecFloat32Column({{222.0}})); } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index 05b18dc9e2e..5fb7debed72 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -159,7 +159,7 @@ bool checkMatch( {all_range}, 1, std::numeric_limits::max(), - std::make_shared(filter), + std::make_shared(filter), std::vector{}, 0, name, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp index c1b17e94f07..0391ff1100b 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp @@ -1217,7 +1217,7 @@ class VectorIndexSegmentTestBase columns_to_read, snapshot, {range}, - std::make_shared(wrapWithANNQueryInfo({}, ann_query)), + std::make_shared(ann_query), pack_filter_results, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, @@ -1798,7 +1798,7 @@ class VectorIndexSegmentOnS3Test BlockInputStreamPtr createComputeNodeStream( const SegmentPtr & write_node_segment, const ColumnDefines & columns_to_read, - const PushDownFilterPtr & filter, + const PushDownExecutorPtr & filter, const ScanContextPtr & read_scan_context = nullptr) { auto write_dm_context = dmContext(); @@ -1951,7 +1951,7 @@ class VectorIndexSegmentOnS3Test auto stream = createComputeNodeStream( wn_segment, {cdPK(), cdVec()}, - std::make_shared(wrapWithANNQueryInfo(nullptr, ann_query_info)), + std::make_shared(ann_query_info), read_scan_context); return stream; } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index_utils.h b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index_utils.h index b3516e1de44..9fb34e43031 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index_utils.h +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index_utils.h @@ -149,7 +149,7 @@ class DeltaMergeStoreVectorBase : public VectorIndexTestUtils store->write(*db_context, db_context->getSettingsRef(), block); } - void read(const RowKeyRange & range, const PushDownFilterPtr & filter, const ColumnWithTypeAndName & out) + void read(const RowKeyRange & range, const PushDownExecutorPtr & filter, const ColumnWithTypeAndName & out) { auto in = store->read( *db_context, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp index 01e38b58c1e..e4ebbf2f6a0 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp @@ -99,12 +99,10 @@ class SkippableBlockInputStreamTest : public SegmentTestBasic const RowKeyRanges & read_ranges) { return segment->getConcatSkippableBlockInputStream( - nullptr, snapshot, *dm_context, columns_to_read, read_ranges, - EMPTY_RS_OPERATOR, {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, diff --git a/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp b/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp index 566d6bc3e43..0836965ecde 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index df7ceb171a5..685d57071b6 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include @@ -834,7 +834,8 @@ BlockInputStreams StorageDeltaMerge::read( query_info.req_id, tracing_logger); - auto filter = PushDownFilter::build(query_info, columns_to_read, store->getTableColumns(), context, tracing_logger); + auto filter + = PushDownExecutor::build(query_info, columns_to_read, store->getTableColumns(), context, tracing_logger); auto runtime_filter_list = parseRuntimeFilterList(query_info, store->getTableColumns(), context, tracing_logger); @@ -917,7 +918,8 @@ void StorageDeltaMerge::read( query_info.req_id, tracing_logger); - auto filter = PushDownFilter::build(query_info, columns_to_read, store->getTableColumns(), context, tracing_logger); + auto filter + = PushDownExecutor::build(query_info, columns_to_read, store->getTableColumns(), context, tracing_logger); auto runtime_filter_list = parseRuntimeFilterList(query_info, store->getTableColumns(), context, tracing_logger); diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index f040e76f814..de4340459e4 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/StorageDisaggregatedRemote.cpp b/dbms/src/Storages/StorageDisaggregatedRemote.cpp index 475074d6978..a2e7160ec5f 100644 --- a/dbms/src/Storages/StorageDisaggregatedRemote.cpp +++ b/dbms/src/Storages/StorageDisaggregatedRemote.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -514,9 +514,16 @@ std::variant StorageDisagg { const auto & executor_id = table_scan.getTableScanExecutorID(); + // build the rough set operator auto rs_operator = buildRSOperator(db_context, column_defines); - auto push_down_filter = DM::PushDownFilter::build( + // build ANN query info + DM::ANNQueryInfoPtr ann_query_info = nullptr; + if (table_scan.getANNQueryInfo().query_type() != tipb::ANNQueryType::InvalidQueryType) + ann_query_info = std::make_shared(table_scan.getANNQueryInfo()); + // build push down executor + auto push_down_executor = DM::PushDownExecutor::build( rs_operator, + ann_query_info, table_scan.getColumns(), table_scan.getPushedDownFilters(), *column_defines, @@ -526,7 +533,7 @@ std::variant StorageDisagg db_context, table_scan.isFastScan(), table_scan.keepOrder(), - push_down_filter); + push_down_executor); const UInt64 start_ts = sender_target_mpp_task_id.gather_id.query_id.start_ts; const auto enable_read_thread = db_context.getSettingsRef().dt_enable_read_thread; LOG_INFO( @@ -546,7 +553,7 @@ std::variant StorageDisagg return std::make_shared( extra_table_id_index, *column_defines, - push_down_filter, + push_down_executor, start_ts, db_context.getSettingsRef().max_block_size, read_mode, @@ -566,7 +573,7 @@ std::variant StorageDisagg .log = log->getChild(executor_id), .columns_to_read = column_defines, .start_ts = start_ts, - .push_down_filter = push_down_filter, + .push_down_executor = push_down_executor, .read_mode = read_mode, }, num_streams); diff --git a/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp b/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp index b0808bab5f2..748542d3e66 100644 --- a/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp +++ b/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,13 +29,14 @@ #include #include +#include #include namespace DB::tests { -class ParsePushDownFilterTest : public ::testing::Test +class ParsePushDownExecutorTest : public ::testing::Test { public: static void SetUpTestCase() @@ -54,13 +55,13 @@ class ParsePushDownFilterTest : public ::testing::Test LoggerPtr log = Logger::get(); ContextPtr ctx = DB::tests::TiFlashTestEnv::getContext(); TimezoneInfo default_timezone_info = DB::tests::TiFlashTestEnv::getContext()->getTimezoneInfo(); - DM::PushDownFilterPtr generatePushDownFilter( + DM::PushDownExecutorPtr generatePushDownExecutor( const String & table_info_json, const String & query, TimezoneInfo & timezone_info); }; -DM::PushDownFilterPtr generatePushDownFilter( +DM::PushDownExecutorPtr generatePushDownExecutor( Context & ctx, const String & table_info_json, const String & query, @@ -124,21 +125,27 @@ DM::PushDownFilterPtr generatePushDownFilter( auto rs_operator = DM::FilterParser::parseDAGQuery(*dag_query, table_info.columns, std::move(create_attr_by_column_id), log); - auto push_down_filter - = DM::PushDownFilter::build(rs_operator, table_info.columns, pushed_down_filters, columns_to_read, ctx, log); - return push_down_filter; + auto push_down_executor = DM::PushDownExecutor::build( + rs_operator, + std::make_shared(dag_query->ann_query_info), + table_info.columns, + pushed_down_filters, + columns_to_read, + ctx, + log); + return push_down_executor; } -DM::PushDownFilterPtr ParsePushDownFilterTest::generatePushDownFilter( +DM::PushDownExecutorPtr ParsePushDownExecutorTest::generatePushDownExecutor( const String & table_info_json, const String & query, TimezoneInfo & timezone_info) { - return ::DB::tests::generatePushDownFilter(*ctx, table_info_json, query, timezone_info); + return ::DB::tests::generatePushDownExecutor(*ctx, table_info_json, query, timezone_info); } // Test cases for col and literal -TEST_F(ParsePushDownFilterTest, ColAndLiteral) +TEST_F(ParsePushDownExecutorTest, ColAndLiteral) try { const String table_info_json = R"json({ @@ -152,7 +159,7 @@ try { // Equal between col and literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 = 666", default_timezone_info); @@ -174,7 +181,7 @@ try { // Greater between col and literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 > 666", default_timezone_info); @@ -196,7 +203,7 @@ try { // GreaterEqual between col and literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 >= 667", default_timezone_info); @@ -218,7 +225,7 @@ try { // Less between col and literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 < 777", default_timezone_info); @@ -240,7 +247,7 @@ try { // LessEqual between col and literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 <= 776", default_timezone_info); @@ -262,7 +269,7 @@ try } CATCH -TEST_F(ParsePushDownFilterTest, LiteralAndCol) +TEST_F(ParsePushDownExecutorTest, LiteralAndCol) try { const String table_info_json = R"json({ @@ -276,7 +283,7 @@ try // Test cases for literal and col (inverse direction) { // Equal between literal and col (take care of direction) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where 667 = col_2", default_timezone_info); @@ -298,7 +305,7 @@ try { // NotEqual between literal and col (take care of direction) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where 667 != col_2", default_timezone_info); @@ -320,7 +327,7 @@ try { // Greater between literal and col (take care of direction) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where 667 < col_2", default_timezone_info); @@ -342,7 +349,7 @@ try { // GreaterEqual between literal and col (take care of direction) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where 667 <= col_2", default_timezone_info); @@ -364,7 +371,7 @@ try { // Less between literal and col (take care of direction) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where 777 > col_2", default_timezone_info); @@ -386,7 +393,7 @@ try { // LessEqual between literal and col (take care of direction) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where 777 >= col_2", default_timezone_info); @@ -409,7 +416,7 @@ try CATCH // Test cases for Logic operator -TEST_F(ParsePushDownFilterTest, LogicOperator) +TEST_F(ParsePushDownExecutorTest, LogicOperator) try { const String table_info_json = R"json({ @@ -424,7 +431,7 @@ try })json"; { // Not - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select col_1, col_2 from default.t_111 where NOT col_2=666", default_timezone_info); @@ -451,7 +458,7 @@ try { // And - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_1 = 'test1' and col_2 = 666", default_timezone_info); @@ -478,7 +485,7 @@ try { // OR - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 = 789 or col_2 = 777", default_timezone_info); @@ -508,7 +515,7 @@ try // More complicated { // And with "not supported" - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_1 = 'test1' and not col_2 = 666", default_timezone_info); @@ -535,7 +542,7 @@ try { // And with not - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 = 789 and not col_3 = 666", default_timezone_info); @@ -564,7 +571,7 @@ try { // And with or - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 = 789 and (col_3 = 666 or col_3 = 678)", default_timezone_info); @@ -595,7 +602,7 @@ try { // Or with "not supported" - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_1 = 'test1' or col_2 = 666", default_timezone_info); @@ -622,7 +629,7 @@ try { // Or with not - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_1 = 'test1' or not col_2 = 666", default_timezone_info); @@ -649,7 +656,7 @@ try { // And between col and literal (not supported since And only support when child is ColumnExpr) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 and 1", default_timezone_info); @@ -674,7 +681,7 @@ try { // Or between col and literal (not supported since Or only support when child is ColumnExpr) - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, "select * from default.t_111 where col_2 or 1", default_timezone_info); @@ -702,7 +709,7 @@ try CATCH // Test cases for date,datetime,timestamp column -TEST_F(ParsePushDownFilterTest, TimestampColumn) +TEST_F(ParsePushDownExecutorTest, TimestampColumn) try { const String table_info_json = R"json({ @@ -732,7 +739,7 @@ try convertTimeZone(origin_time_stamp, converted_time, *timezone_info.timezone, time_zone_utc); // converted_time: 0 - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, String("select * from default.t_111 where col_timestamp > cast_string_datetime('") + datetime + String("')"), @@ -781,7 +788,7 @@ try convertTimeZone(origin_time_stamp, converted_time, *timezone_info.timezone, time_zone_utc); // converted_time: 1802216518491045888 - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, String("select * from default.t_111 where col_timestamp > cast_string_datetime('") + datetime + String("')"), @@ -837,7 +844,7 @@ try convertTimeZoneByOffset(origin_time_stamp, converted_time, false, timezone_info.timezone_offset); // converted_time: 0 - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, String("select * from default.t_111 where col_timestamp > cast_string_datetime('") + datetime + String("')"), @@ -887,7 +894,7 @@ try { // Greater between Datetime col and Datetime literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, String("select * from default.t_111 where col_datetime > cast_string_datetime('") + datetime + String("')"), default_timezone_info); @@ -935,7 +942,7 @@ try { // Greater between Date col and Datetime literal - auto filter = generatePushDownFilter( + auto filter = generatePushDownExecutor( table_info_json, String("select * from default.t_111 where col_date > cast_string_datetime('") + datetime + String("')"), default_timezone_info); diff --git a/dbms/src/TiDB/Schema/tests/gtest_schema_sync.cpp b/dbms/src/TiDB/Schema/tests/gtest_schema_sync.cpp index 03c1473c347..43426e9ce5e 100644 --- a/dbms/src/TiDB/Schema/tests/gtest_schema_sync.cpp +++ b/dbms/src/TiDB/Schema/tests/gtest_schema_sync.cpp @@ -862,7 +862,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(dmsv.encodeVectorFloat32({1.0, 2.0, 3.5})); - auto filter = std::make_shared(DM::wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); dmsv.read(range, filter, createVecFloat32Column({{1.0, 2.0, 3.5}})); } @@ -873,7 +873,7 @@ try ann_query_info->set_top_k(1); ann_query_info->set_ref_vec_f32(dmsv.encodeVectorFloat32({1.0, 2.0, 3.8})); - auto filter = std::make_shared(DM::wrapWithANNQueryInfo(nullptr, ann_query_info)); + auto filter = std::make_shared(ann_query_info); dmsv.read(range, filter, createVecFloat32Column({{1.0, 2.0, 3.5}})); } From 65a7fc45253b345b924ee8a2c88fd97b4b7ff544 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Wed, 25 Dec 2024 11:07:03 +0800 Subject: [PATCH 14/17] fix Signed-off-by: Lloyd-Pottiger --- dbms/src/Flash/tests/gtest_filter_executor.cpp | 2 +- .../DeltaMerge/Filter/PushDownExecutor.cpp | 17 ++++++++++------- .../tests/gtest_kvstore_fast_add_peer.cpp | 5 +++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index 5fc2d1da541..5e11f65f6d0 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -235,7 +235,7 @@ try } CATCH -TEST_F(FilterExecutorTestRunner, convert_bool) +TEST_F(FilterExecutorTestRunner, convertBool) try { { diff --git a/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp b/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp index ce579acfc6b..de666f3e622 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp +++ b/dbms/src/Storages/DeltaMerge/Filter/PushDownExecutor.cpp @@ -35,13 +35,16 @@ PushDownExecutorPtr PushDownExecutor::build( { // check if the ann_query_info is valid auto valid_ann_query_info = ann_query_info; - bool is_valid_ann_query = ann_query_info->top_k() != std::numeric_limits::max(); - bool is_matching_ann_query = std::any_of( - columns_to_read.begin(), - columns_to_read.end(), - [cid = ann_query_info->column_id()](const ColumnDefine & cd) -> bool { return cd.id == cid; }); - if (!is_valid_ann_query || !is_matching_ann_query) - valid_ann_query_info = nullptr; + if (ann_query_info) + { + bool is_valid_ann_query = ann_query_info->top_k() != std::numeric_limits::max(); + bool is_matching_ann_query = std::any_of( + columns_to_read.begin(), + columns_to_read.end(), + [cid = ann_query_info->column_id()](const ColumnDefine & cd) -> bool { return cd.id == cid; }); + if (!is_valid_ann_query || !is_matching_ann_query) + valid_ann_query_info = nullptr; + } if (pushed_down_filters.empty()) { diff --git a/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp b/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp index 0836965ecde..9516f281079 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp @@ -179,7 +179,7 @@ class RegionKVStoreTestFAP : public KVStoreTestBase protected: UInt64 upload_sequence = 1000; - UInt64 table_id; + UInt64 table_id{}; private: ContextPtr context; @@ -596,7 +596,8 @@ try return genFastAddPeerResFail(FastAddPeerStatus::NoSuitable); }); // Will generate and persist some information in local ps, which will not be uploaded. - FastAddPeerImplWrite(global_context.getTMTContext(), proxy_helper.get(), region_id, 2333, std::move(mock_data), 0); + auto mock_data_cp = mock_data; + FastAddPeerImplWrite(global_context.getTMTContext(), proxy_helper.get(), region_id, 2333, std::move(mock_data_cp), 0); dumpCheckpoint(); FastAddPeerImplWrite(global_context.getTMTContext(), proxy_helper.get(), region_id, 2333, std::move(mock_data), 0); exe_lock.unlock(); From ee2a32ccddb52bf2d5471a07a168950596b4f252 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Wed, 25 Dec 2024 13:53:18 +0800 Subject: [PATCH 15/17] fix Signed-off-by: Lloyd-Pottiger --- .../KVStore/tests/gtest_kvstore_fast_add_peer.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp b/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp index 9516f281079..7ecef9432b5 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp @@ -597,7 +597,13 @@ try }); // Will generate and persist some information in local ps, which will not be uploaded. auto mock_data_cp = mock_data; - FastAddPeerImplWrite(global_context.getTMTContext(), proxy_helper.get(), region_id, 2333, std::move(mock_data_cp), 0); + FastAddPeerImplWrite( + global_context.getTMTContext(), + proxy_helper.get(), + region_id, + 2333, + std::move(mock_data_cp), + 0); dumpCheckpoint(); FastAddPeerImplWrite(global_context.getTMTContext(), proxy_helper.get(), region_id, 2333, std::move(mock_data), 0); exe_lock.unlock(); From 939e5b596abd419d6a3683390273b362f24a84ca Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Wed, 25 Dec 2024 17:00:15 +0800 Subject: [PATCH 16/17] address comments Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/Segment.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 46efe954d59..63a419a9b08 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3314,8 +3314,7 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( constexpr auto is_fast_scan = true; auto enable_del_clean_read = !hasColumn(columns_to_read, TAG_COLUMN_ID); - auto ann_query_info = getANNQueryInfo(filter); - SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->tryGetInputStreamWithVectorIndex( + SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->getInputStream( dm_context, columns_to_read, read_ranges, @@ -3582,7 +3581,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( SkippableBlockInputStreamPtr stream; if (executor && executor->ann_query_info) { - // If has ANN index query, use ANN index query + // For ANN query, try to use vector index to accelerate. bool is_vector = false; std::tie(stream, is_vector) = getConcatVectorIndexBlockInputStream( bitmap_filter, From 9a4f517e5067276c267b9841bb5dcd82bfc9772d Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Fri, 27 Dec 2024 13:40:00 +0800 Subject: [PATCH 17/17] address comments Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h | 1 + dbms/src/Storages/DeltaMerge/Segment.h | 1 + 2 files changed, 2 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h b/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h index 5ad92d5f28e..8c6bdb883d3 100644 --- a/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/ConcatSkippableBlockInputStream.h @@ -77,6 +77,7 @@ class ConcatVectorIndexBlockInputStream : public SkippableBlockInputStream , bitmap_filter(bitmap_filter_) {} + // Returns static std::tuple build( const BitmapFilterPtr & bitmap_filter, std::shared_ptr> stream, diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index f32e43e2add..bb331e8cdef 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -751,6 +751,7 @@ class Segment const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); + // Returns std::tuple getConcatVectorIndexBlockInputStream( BitmapFilterPtr bitmap_filter, const SegmentSnapshotPtr & segment_snap,