From 58a38aafbf8c6e323aa295633a28cc07e13344fd Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Thu, 19 Dec 2024 16:56:44 +0800 Subject: [PATCH 01/11] Storages: load RSResult only once Signed-off-by: Lloyd-Pottiger --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 12 +- dbms/src/Storages/DeltaMerge/File/DMFile.h | 4 +- .../File/DMFileBlockInputStream.cpp | 38 +----- .../DeltaMerge/File/DMFileBlockInputStream.h | 16 ++- .../DeltaMerge/File/DMFilePackFilter.cpp | 83 ++++-------- .../DeltaMerge/File/DMFilePackFilter.h | 105 ++++------------ .../File/DMFilePackFilterResult.cpp | 83 ++++++++++++ .../DeltaMerge/File/DMFilePackFilterResult.h | 106 ++++++++++++++++ .../DeltaMerge/File/DMFilePackFilter_fwd.h | 4 + .../Storages/DeltaMerge/File/DMFileReader.cpp | 4 +- .../Storages/DeltaMerge/File/DMFileReader.h | 4 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 119 +++++++++--------- dbms/src/Storages/DeltaMerge/Segment.h | 17 +-- .../Storages/DeltaMerge/SegmentReadTask.cpp | 2 +- .../Storages/DeltaMerge/StableValueSpace.cpp | 106 +++++++++++----- .../Storages/DeltaMerge/StableValueSpace.h | 21 +++- .../DeltaMerge/tests/gtest_dm_file.cpp | 20 +-- .../DeltaMerge/tests/gtest_dm_segment.cpp | 11 +- .../tests/gtest_dm_vector_index.cpp | 55 ++++---- .../DeltaMerge/tests/gtest_segment_bitmap.cpp | 15 ++- .../tests/gtest_segment_test_basic.cpp | 2 +- .../gtest_skippable_block_input_stream.cpp | 1 + 22 files changed, 488 insertions(+), 340 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp create mode 100644 dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 64469af3f72..90aa65c10ca 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -38,21 +38,13 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil void ColumnFileBig::calculateStat(const DMContext & dm_context) { - auto index_cache = dm_context.global_context.getMinMaxIndexCache(); - auto pack_filter = DMFilePackFilter::loadFrom( + dm_context, file, - index_cache, /*set_cache_if_miss*/ false, {segment_range}, EMPTY_RS_OPERATOR, - {}, - dm_context.global_context.getFileProvider(), - dm_context.getReadLimiter(), - dm_context.scan_context, - /*tracing_id*/ dm_context.tracing_id, - ReadTag::Internal); - + {}); std::tie(valid_rows, valid_bytes) = pack_filter.validRowsAndBytes(); } diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.h b/dbms/src/Storages/DeltaMerge/File/DMFile.h index 76c28975ddb..d6751f2f0d9 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.h @@ -210,6 +210,8 @@ class DMFile : private boost::noncopyable UInt32 metaVersion() const { return meta->metaVersion(); } + bool isColIndexExist(const ColId & col_id) const; + private: DMFile( UInt64 file_id_, @@ -293,8 +295,6 @@ class DMFile : private boost::noncopyable String colIndexCacheKey(const FileNameBase & file_name_base) const; String colMarkCacheKey(const FileNameBase & file_name_base) const; - bool isColIndexExist(const ColId & col_id) const; - String encryptionBasePath() const; EncryptionPath encryptionDataPath(const FileNameBase & file_name_base) const; EncryptionPath encryptionIndexPath(const FileNameBase & file_name_base) const; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 1f4dff89f00..b3aee6221fb 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -19,7 +19,6 @@ #include #include - namespace DB::DM { @@ -58,19 +57,6 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; - DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom( - dmfile, - index_cache, - /*set_cache_if_miss*/ true, - rowkey_ranges, - rs_filter, - read_packs, - file_provider, - read_limiter, - scan_context, - tracing_id, - read_tag); - bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); if (!enable_read_thread || max_sharing_column_bytes_for_all <= 0) @@ -87,7 +73,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( enable_del_clean_read, is_fast_scan, max_data_version, - std::move(pack_filter), + *pack_filter, mark_cache, enable_column_cache, column_cache, @@ -140,18 +126,13 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn return build(dmfile, read_columns, rowkey_ranges, scan_context); }; - if (!rs_filter) - return fallback(); - - auto filter_with_ann = std::dynamic_pointer_cast(rs_filter); - if (!filter_with_ann) + if (!ann_query_info) return fallback(); if (!bitmap_filter.has_value()) return fallback(); Block header_layout = toEmptyBlock(read_columns); - auto ann_query_info = filter_with_ann->ann_query_info; // Copy out the vector column for later use. Copy is intentionally performed after the // fast check so that in fallback conditions we don't need unnecessary copies. @@ -181,19 +162,6 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn // All check passed. Let's read via vector index. - DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom( - dmfile, - index_cache, - /*set_cache_if_miss*/ true, - rowkey_ranges, - rs_filter, - read_packs, - file_provider, - read_limiter, - scan_context, - tracing_id, - ReadTag::Query); - bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; @@ -205,7 +173,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn enable_del_clean_read, is_fast_scan, max_data_version, - std::move(pack_filter), + *pack_filter, mark_cache, enable_column_cache, column_cache, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h index 456999aa4c9..a2a89ae7f26 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h @@ -138,9 +138,9 @@ class DMFileBlockInputStreamBuilder return *this; } - DMFileBlockInputStreamBuilder & setRSOperator(const RSOperatorPtr & filter_) + DMFileBlockInputStreamBuilder setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_) { - rs_filter = filter_; + ann_query_info = ann_query_info_; return *this; } @@ -180,6 +180,12 @@ class DMFileBlockInputStreamBuilder return *this; } + DMFileBlockInputStreamBuilder & setDMFilePackFilterResult(const DMFilePackFilterResultPtr & pack_filter_) + { + pack_filter = pack_filter_; + return *this; + } + /** * @note To really enable the long term cache, you also need to ensure * ColumnCacheLongTerm is initialized in the global context. @@ -217,8 +223,6 @@ class DMFileBlockInputStreamBuilder bool is_fast_scan = false; bool enable_del_clean_read = false; UInt64 max_data_version = std::numeric_limits::max(); - // Rough set filter - RSOperatorPtr rs_filter; // packs filter (filter by pack index) IdSetPtr read_packs; MarkCachePtr mark_cache; @@ -234,6 +238,10 @@ class DMFileBlockInputStreamBuilder String tracing_id; ReadTag read_tag = ReadTag::Internal; + DMFilePackFilterResultPtr pack_filter; + + ANNQueryInfoPtr ann_query_info = nullptr; + VectorIndexCachePtr vector_index_cache; // Note: Currently thie field is assigned only for Stable streams, not available for ColumnFileBig std::optional bitmap_filter; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index e465f11caad..3bbe0bd0967 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -1,5 +1,4 @@ - -// Copyright 2023 PingCAP, Inc. +// Copyright 2024 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,20 +17,20 @@ #include #include -#include namespace DB::DM { -void DMFilePackFilter::init(ReadTag read_tag) +DMFilePackFilterResult DMFilePackFilter::load(const DMContext & dm_context) { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); + DMFilePackFilterResult result(dm_context, dmfile, pack_count); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + tryLoadIndex(result.param, EXTRA_HANDLE_COLUMN_ID); std::vector handle_filters; for (auto & rowkey_range : rowkey_ranges) handle_filters.emplace_back(toFilter(rowkey_range)); @@ -64,16 +63,16 @@ void DMFilePackFilter::init(ReadTag read_tag) #endif for (size_t i = 0; i < pack_count; ++i) { - handle_res[i] = RSResult::None; + result.handle_res[i] = RSResult::None; } for (auto & handle_filter : handle_filters) { - auto res = handle_filter->roughCheck(0, pack_count, param); + auto res = handle_filter->roughCheck(0, pack_count, result.param); std::transform( - handle_res.begin(), - handle_res.end(), + result.handle_res.begin(), + result.handle_res.end(), res.begin(), - handle_res.begin(), + result.handle_res.begin(), [](RSResult a, RSResult b) { return a || b; }); } } @@ -81,18 +80,18 @@ void DMFilePackFilter::init(ReadTag read_tag) ProfileEvents::increment(ProfileEvents::DMFileFilterNoFilter, pack_count); /// Check packs by handle_res - pack_res = handle_res; - auto after_pk = countUsePack(); + result.pack_res = result.handle_res; + auto after_pk = result.countUsePack(); /// Check packs by read_packs if (read_packs) { for (size_t i = 0; i < pack_count; ++i) { - pack_res[i] = read_packs->contains(i) ? pack_res[i] : RSResult::None; + result.pack_res[i] = read_packs->contains(i) ? result.pack_res[i] : RSResult::None; } } - auto after_read_packs = countUsePack(); + auto after_read_packs = result.countUsePack(); ProfileEvents::increment(ProfileEvents::DMFileFilterAftPKAndPackSet, after_read_packs); /// Check packs by filter in where clause @@ -102,36 +101,30 @@ void DMFilePackFilter::init(ReadTag read_tag) ColIds ids = filter->getColumnIDs(); for (const auto & id : ids) { - tryLoadIndex(id); + tryLoadIndex(result.param, id); } - const auto check_results = filter->roughCheck(0, pack_count, param); + const auto check_results = filter->roughCheck(0, pack_count, result.param); std::transform( - pack_res.cbegin(), - pack_res.cend(), + result.pack_res.cbegin(), + result.pack_res.cend(), check_results.cbegin(), - pack_res.begin(), + result.pack_res.begin(), [](RSResult a, RSResult b) { return a && b; }); } else { // ColumnFileBig in DeltaValueSpace never pass a filter to DMFilePackFilter. // Assume its filter always return Some. - std::transform(pack_res.cbegin(), pack_res.cend(), pack_res.begin(), [](RSResult a) { + std::transform(result.pack_res.cbegin(), result.pack_res.cend(), result.pack_res.begin(), [](RSResult a) { return a && RSResult::Some; }); } - auto [none_count, some_count, all_count, all_null_count] = countPackRes(); + auto [none_count, some_count, all_count, all_null_count] = result.countPackRes(); auto after_filter = some_count + all_count + all_null_count; ProfileEvents::increment(ProfileEvents::DMFileFilterAftRoughSet, after_filter); - // In table scanning, DMFilePackFilter of a DMFile may be created several times: - // 1. When building MVCC bitmap (ReadTag::MVCC). - // 2. When building LM filter stream (ReadTag::LM). - // 3. When building stream of other columns (ReadTag::Query). - // Only need to count the filter result once. - // TODO: We can create DMFilePackFilter at the beginning and pass it to the stages described above. - if (read_tag == ReadTag::Query) + if (scan_context) { scan_context->rs_pack_filter_none += none_count; scan_context->rs_pack_filter_some += some_count; @@ -148,8 +141,7 @@ void DMFilePackFilter::init(ReadTag read_tag) LOG_DEBUG( log, "RSFilter exclude rate: {:.2f}, after_pk: {}, after_read_packs: {}, after_filter: {}, handle_ranges: {}" - ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}, all_null_count: {}, " - "read_tag: {}", + ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}, all_null_count: {}", ((after_read_packs == 0) ? std::numeric_limits::quiet_NaN() : filter_rate), after_pk, after_read_packs, @@ -160,33 +152,8 @@ void DMFilePackFilter::init(ReadTag read_tag) none_count, some_count, all_count, - all_null_count, - magic_enum::enum_name(read_tag)); -} - -std::tuple DMFilePackFilter::countPackRes() const -{ - UInt64 none_count = 0; - UInt64 some_count = 0; - UInt64 all_count = 0; - UInt64 all_null_count = 0; - for (auto res : pack_res) - { - if (res == RSResult::None || res == RSResult::NoneNull) - ++none_count; - else if (res == RSResult::Some || res == RSResult::SomeNull) - ++some_count; - else if (res == RSResult::All) - ++all_count; - else if (res == RSResult::AllNull) - ++all_null_count; - } - return {none_count, some_count, all_count, all_null_count}; -} - -UInt64 DMFilePackFilter::countUsePack() const -{ - return std::count_if(pack_res.cbegin(), pack_res.cend(), [](RSResult res) { return res.isUse(); }); + all_null_count); + return result; } void DMFilePackFilter::loadIndex( @@ -296,7 +263,7 @@ void DMFilePackFilter::loadIndex( indexes.emplace(col_id, RSIndex(type, minmax_index)); } -void DMFilePackFilter::tryLoadIndex(ColId col_id) +void DMFilePackFilter::tryLoadIndex(RSCheckParam & param, ColId col_id) { if (param.indexes.count(col_id)) return; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 6e21fa99dbb..2483df83c52 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -34,84 +36,35 @@ extern const Event DMFileFilterAftPKAndPackSet; extern const Event DMFileFilterAftRoughSet; } // namespace ProfileEvents -namespace DB -{ -namespace DM +namespace DB::DM { + class DMFilePackFilter { + friend class DMFilePackFilterResult; + public: // Empty `rowkey_ranges` means do not filter by rowkey_ranges - static DMFilePackFilter loadFrom( + static DMFilePackFilterResult loadFrom( + const DMContext & dm_context, const DMFilePtr & dmfile, - const MinMaxIndexCachePtr & index_cache, bool set_cache_if_miss, const RowKeyRanges & rowkey_ranges, const RSOperatorPtr & filter, - const IdSetPtr & read_packs, - const FileProviderPtr & file_provider, - const ReadLimiterPtr & read_limiter, - const ScanContextPtr & scan_context, - const String & tracing_id, - const ReadTag read_tag) + const IdSetPtr & read_packs) { - return DMFilePackFilter( + DMFilePackFilter pack_filter( dmfile, - index_cache, + dm_context.global_context.getMinMaxIndexCache(), set_cache_if_miss, rowkey_ranges, filter, read_packs, - file_provider, - read_limiter, - scan_context, - tracing_id, - read_tag); - } - - const RSResults & getHandleRes() const { return handle_res; } - const RSResults & getPackResConst() const { return pack_res; } - UInt64 countUsePack() const; - - Handle getMinHandle(size_t pack_id) - { - if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); - auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; - return minmax_index->getIntMinMax(pack_id).first; - } - - StringRef getMinStringHandle(size_t pack_id) - { - if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); - auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; - return minmax_index->getStringMinMax(pack_id).first; - } - - UInt64 getMaxVersion(size_t pack_id) - { - if (!param.indexes.count(VERSION_COLUMN_ID)) - tryLoadIndex(VERSION_COLUMN_ID); - auto & minmax_index = param.indexes.find(VERSION_COLUMN_ID)->second.minmax; - return minmax_index->getUInt64MinMax(pack_id).second; - } - - // Get valid rows and bytes after filter invalid packs by handle_range and filter - std::pair validRowsAndBytes() - { - size_t rows = 0; - size_t bytes = 0; - const auto & pack_stats = dmfile->getPackStats(); - for (size_t i = 0; i < pack_stats.size(); ++i) - { - if (pack_res[i].isUse()) - { - rows += pack_stats[i].rows; - bytes += pack_stats[i].bytes; - } - } - return {rows, bytes}; + dm_context.global_context.getFileProvider(), + dm_context.global_context.getReadLimiter(), + dm_context.scan_context, + dm_context.tracing_id); + return pack_filter.load(dm_context); } private: @@ -125,8 +78,7 @@ class DMFilePackFilter const FileProviderPtr & file_provider_, const ReadLimiterPtr & read_limiter_, const ScanContextPtr & scan_context_, - const String & tracing_id, - const ReadTag read_tag) + const String & tracing_id) : dmfile(dmfile_) , index_cache(index_cache_) , set_cache_if_miss(set_cache_if_miss_) @@ -134,15 +86,12 @@ class DMFilePackFilter , filter(filter_) , read_packs(read_packs_) , file_provider(file_provider_) - , handle_res(dmfile->getPacks(), RSResult::All) , scan_context(scan_context_) , log(Logger::get(tracing_id)) , read_limiter(read_limiter_) - { - init(read_tag); - } + {} - void init(ReadTag read_tag); + DMFilePackFilterResult load(const DMContext & dm_context); static void loadIndex( ColumnIndexes & indexes, @@ -154,13 +103,11 @@ class DMFilePackFilter const ReadLimiterPtr & read_limiter, const ScanContextPtr & scan_context); - void tryLoadIndex(ColId col_id); - - // None+NoneNull, Some+SomeNull, All, AllNull - std::tuple countPackRes() const; + void tryLoadIndex(RSCheckParam & param, ColId col_id); private: DMFilePtr dmfile; + MinMaxIndexCachePtr index_cache; bool set_cache_if_miss; RowKeyRanges rowkey_ranges; @@ -168,18 +115,10 @@ class DMFilePackFilter IdSetPtr read_packs; FileProviderPtr file_provider; - RSCheckParam param; - - // `handle_res` is the filter results of `rowkey_ranges`. - std::vector handle_res; - // `pack_res` is the filter results of `rowkey_ranges && filter && read_packs`. - std::vector pack_res; - const ScanContextPtr scan_context; LoggerPtr log; ReadLimiterPtr read_limiter; }; -} // namespace DM -} // namespace DB +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp new file mode 100644 index 00000000000..7f431338a75 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -0,0 +1,83 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB::DM +{ + +UInt64 DMFilePackFilterResult::countUsePack() const +{ + return std::count_if(pack_res.begin(), pack_res.end(), [](RSResult res) { return res.isUse(); }); +} + +std::pair DMFilePackFilterResult::validRowsAndBytes() +{ + size_t rows = 0; + size_t bytes = 0; + const auto & pack_stats = dmfile->getPackStats(); + for (size_t i = 0; i < pack_stats.size(); ++i) + { + if (pack_res[i].isUse()) + { + rows += pack_stats[i].rows; + bytes += pack_stats[i].bytes; + } + } + return {rows, bytes}; +} + +std::tuple DMFilePackFilterResult::countPackRes() const +{ + UInt64 none_count = 0; + UInt64 some_count = 0; + UInt64 all_count = 0; + UInt64 all_null_count = 0; + for (auto res : pack_res) + { + if (res == RSResult::None || res == RSResult::NoneNull) + ++none_count; + else if (res == RSResult::Some || res == RSResult::SomeNull) + ++some_count; + else if (res == RSResult::All) + ++all_count; + else if (res == RSResult::AllNull) + ++all_null_count; + } + return {none_count, some_count, all_count, all_null_count}; +} + +void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const +{ + if (param.indexes.count(col_id)) + return; + + if (!dmfile->isColIndexExist(col_id)) + return; + + Stopwatch watch; + DMFilePackFilter::loadIndex( + param.indexes, + dmfile, + dm_context.global_context.getFileProvider(), + dm_context.global_context.getMinMaxIndexCache(), + true, + col_id, + dm_context.global_context.getReadLimiter(), + dm_context.scan_context); +} + +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h new file mode 100644 index 00000000000..5ac4e2b8006 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -0,0 +1,106 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB::DM +{ + +class DMFilePackFilterResult; +using DMFilePackFilterResultPtr = std::shared_ptr; +using DMFilePackFilterResults = std::vector; + +class DMFilePackFilterResult +{ + friend class DMFilePackFilter; + +public: + DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_, size_t pack_count_) + : dm_context(dm_context_) + , dmfile(dmfile_) + , handle_res(pack_count_, RSResult::All) + {} + + const RSResults & getHandleRes() const { return handle_res; } + const RSResults & getPackResConst() const { return pack_res; } + RSResults & getPackRes() { return pack_res; } + UInt64 countUsePack() const; + + Handle getMinHandle(size_t pack_id) const + { + if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) + tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; + return minmax_index->getIntMinMax(pack_id).first; + } + + StringRef getMinStringHandle(size_t pack_id) const + { + if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) + tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; + return minmax_index->getStringMinMax(pack_id).first; + } + + UInt64 getMaxVersion(size_t pack_id) const + { + if (!param.indexes.count(VERSION_COLUMN_ID)) + tryLoadIndex(VERSION_COLUMN_ID); + auto & minmax_index = param.indexes.find(VERSION_COLUMN_ID)->second.minmax; + return minmax_index->getUInt64MinMax(pack_id).second; + } + + static DMFilePackFilterResultPtr emptyResult(const DMContext & dm_context, const DMFilePtr & dmfile) + { + return std::make_shared(dm_context, dmfile, 0); + } + + static DMFilePackFilterResults emptyResults(const DMContext & dm_context, const DMFiles & files) + { + DMFilePackFilterResults results; + results.reserve(files.size()); + for (const auto & file : files) + { + results.push_back(emptyResult(dm_context, file)); + } + return results; + } + + // Get valid rows and bytes after filter invalid packs by handle_range and filter + std::pair validRowsAndBytes(); + + // None+NoneNull, Some+SomeNull, All, AllNull + std::tuple countPackRes() const; + +private: + void tryLoadIndex(ColId col_id) const; + +private: + const DMContext & dm_context; + + DMFilePtr dmfile; + mutable RSCheckParam param; + + // `handle_res` is the filter results of `rowkey_ranges`. + std::vector handle_res; + // `pack_res` is the filter results of `rowkey_ranges && filter && read_packs`. + std::vector pack_res; +}; + +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h index 60246ae83f4..f2064ab3f5e 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter_fwd.h @@ -24,4 +24,8 @@ using IdSetPtr = std::shared_ptr; class DMFilePackFilter; +class DMFilePackFilterResult; +using DMFilePackFilterResultPtr = std::shared_ptr; +using DMFilePackFilterResults = std::vector; + } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index b26cf5b5321..125db7956c8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -46,7 +46,7 @@ DMFileReader::DMFileReader( bool is_fast_scan_, UInt64 max_read_version_, // filters - DMFilePackFilter && pack_filter_, + const DMFilePackFilterResult & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -69,7 +69,7 @@ DMFileReader::DMFileReader( , is_fast_scan(is_fast_scan_) , enable_column_cache(enable_column_cache_ && column_cache_) , max_read_version(max_read_version_) - , pack_filter(std::move(pack_filter_)) + , pack_filter(pack_filter_) , mark_cache(mark_cache_) , column_cache(column_cache_) , scan_context(scan_context_) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h index 85c49951b9e..a2336d3f7f9 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h @@ -56,7 +56,7 @@ class DMFileReader // The the MVCC filter version. Used by clean read check. UInt64 max_read_version_, // filters - DMFilePackFilter && pack_filter_, + const DMFilePackFilterResult & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -184,7 +184,7 @@ class DMFileReader const UInt64 max_read_version; /// Filters - DMFilePackFilter pack_filter; + const DMFilePackFilterResult & pack_filter; /// Caches MarkCachePtr mark_cache; diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 237716abe81..79ce22e6b92 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -947,6 +947,21 @@ BlockInputStreamPtr Segment::getInputStream( expected_block_size, columns_to_read, segment_snap->stable->stable); + + // load DMilePackFilterResult for each DMFile + DMFilePackFilterResults pack_filter_results; + for (const auto & dmfile : segment_snap->stable->getDMFiles()) + { + auto result = std::make_shared(DMFilePackFilter::loadFrom( + dm_context, + dmfile, + /*set_cache_if_miss*/ true, + read_ranges, + filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + /*read_pack*/ {})); + pack_filter_results.emplace_back(std::move(result)); + } + switch (read_mode) { case ReadMode::Normal: @@ -955,7 +970,7 @@ BlockInputStreamPtr Segment::getInputStream( columns_to_read, segment_snap, read_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, start_ts, clipped_block_rows); case ReadMode::Fast: @@ -964,7 +979,7 @@ BlockInputStreamPtr Segment::getInputStream( columns_to_read, segment_snap, read_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, clipped_block_rows); case ReadMode::Raw: return getInputStreamModeRaw( // @@ -980,6 +995,7 @@ BlockInputStreamPtr Segment::getInputStream( segment_snap, read_ranges, filter, + pack_filter_results, start_ts, expected_block_size, clipped_block_rows); @@ -1002,7 +1018,7 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, bool need_row_id) @@ -1027,11 +1043,11 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( dm_context, *read_info.read_columns, real_ranges, - filter, start_ts, expected_block_size, false, - read_tag); + read_tag, + pack_filter_results); } else if (useCleanRead(segment_snap, columns_to_read)) { @@ -1041,11 +1057,11 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( dm_context, *read_info.read_columns, real_ranges, - filter, start_ts, expected_block_size, true, - read_tag); + read_tag, + pack_filter_results); } else { @@ -1053,13 +1069,13 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( dm_context, *read_info.read_columns, real_ranges, - filter, segment_snap->stable, read_info.getDeltaReader(need_row_id ? ReadTag::MVCC : ReadTag::Query), read_info.index_begin, read_info.index_end, expected_block_size, read_tag, + pack_filter_results, start_ts, need_row_id); } @@ -1086,7 +1102,7 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( const DMContext & dm_context, const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -1098,7 +1114,7 @@ BlockInputStreamPtr Segment::getInputStreamModeNormal( columns_to_read, segment_snap, read_ranges, - filter, + pack_filter_results, start_ts, expected_block_size); } @@ -1118,7 +1134,6 @@ BlockInputStreamPtr Segment::getInputStreamForDataExport( dm_context, *read_info.read_columns, data_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, read_info.getDeltaReader(ReadTag::Internal), read_info.index_begin, @@ -1153,7 +1168,7 @@ BlockInputStreamPtr Segment::getInputStreamModeFast( const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, size_t expected_block_size) { auto real_ranges = shrinkRowKeyRanges(read_ranges); @@ -1206,11 +1221,11 @@ BlockInputStreamPtr Segment::getInputStreamModeFast( dm_context, *new_columns_to_read, real_ranges, - filter, std::numeric_limits::max(), expected_block_size, enable_handle_clean_read, ReadTag::Query, + pack_filter_results, /* is_fast_scan */ true, enable_del_clean_read); @@ -1272,7 +1287,6 @@ BlockInputStreamPtr Segment::getInputStreamModeRaw( dm_context, *new_columns_to_read, data_ranges, - EMPTY_RS_OPERATOR, std::numeric_limits::max(), expected_block_size, /* enable_handle_clean_read */ false, @@ -1790,7 +1804,6 @@ std::optional Segment::getSplitPointSlow( dm_context, *pk_col_defs, rowkey_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, delta_reader, read_info.index_begin, @@ -1817,7 +1830,6 @@ std::optional Segment::getSplitPointSlow( dm_context, *pk_col_defs, rowkey_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, delta_reader, read_info.index_begin, @@ -2107,7 +2119,6 @@ std::optional Segment::prepareSplitPhysical( // dm_context, *read_info.read_columns, my_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, my_delta_reader, read_info.index_begin, @@ -2139,7 +2150,6 @@ std::optional Segment::prepareSplitPhysical( // dm_context, *read_info.read_columns, other_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, other_delta_reader, read_info.index_begin, @@ -2147,7 +2157,6 @@ std::optional Segment::prepareSplitPhysical( // dm_context.stable_pack_rows, ReadTag::Internal); - other_data = std::make_shared>(other_data, other_ranges, 0); other_data = std::make_shared>( other_data, @@ -2342,7 +2351,6 @@ StableValueSpacePtr Segment::prepareMerge( dm_context, *read_info.read_columns, rowkey_ranges, - EMPTY_RS_OPERATOR, segment_snap->stable, read_info.getDeltaReader(ReadTag::Internal), read_info.index_begin, @@ -2706,13 +2714,13 @@ SkippableBlockInputStreamPtr Segment::getPlacedStream( const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, const StableSnapshotPtr & stable_snap, const DeltaValueReaderPtr & delta_reader, const DeltaIndexIterator & delta_index_begin, const DeltaIndexIterator & delta_index_end, size_t expected_block_size, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, bool need_row_id) { @@ -2723,11 +2731,11 @@ SkippableBlockInputStreamPtr Segment::getPlacedStream( dm_context, read_columns, rowkey_ranges, - filter, start_ts, expected_block_size, /* enable_handle_clean_read */ false, read_tag, + pack_filter_results, /* is_fast_scan */ false, /* enable_del_clean_read */ false); RowKeyRange rowkey_range = rowkey_ranges.size() == 1 @@ -2928,7 +2936,6 @@ bool Segment::placeUpsert( dm_context, {handle, getVersionColumnDefine()}, {place_handle_range}, - EMPTY_RS_OPERATOR, stable_snap, delta_reader, compacted_index->begin(), @@ -2981,7 +2988,6 @@ bool Segment::placeDelete( dm_context, {handle, getVersionColumnDefine()}, delete_ranges, - EMPTY_RS_OPERATOR, stable_snap, delta_reader, compacted_index->begin(), @@ -3019,7 +3025,6 @@ bool Segment::placeDelete( dm_context, {handle, getVersionColumnDefine()}, {place_handle_range}, - EMPTY_RS_OPERATOR, stable_snap, delta_reader, compacted_index->begin(), @@ -3041,7 +3046,7 @@ BitmapFilterPtr Segment::buildBitmapFilter( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3052,13 +3057,19 @@ BitmapFilterPtr Segment::buildBitmapFilter( dm_context, segment_snap, read_ranges, - filter, + pack_filter_results, start_ts, expected_block_size); } else { - return buildBitmapFilterNormal(dm_context, segment_snap, read_ranges, filter, start_ts, expected_block_size); + return buildBitmapFilterNormal( + dm_context, + segment_snap, + read_ranges, + pack_filter_results, + start_ts, + expected_block_size); } } @@ -3066,7 +3077,7 @@ BitmapFilterPtr Segment::buildBitmapFilterNormal( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3080,7 +3091,7 @@ BitmapFilterPtr Segment::buildBitmapFilterNormal( columns_to_read, segment_snap, read_ranges, - filter, + pack_filter_results, start_ts, expected_block_size, /*need_row_id*/ true); @@ -3116,9 +3127,7 @@ struct Range std::pair, std::vector> parseDMFilePackInfo( const DMFiles & dmfiles, - const DMContext & dm_context, - const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_result, UInt64 start_ts) { // Packs that all rows compliant with MVCC filter and RowKey filter requirements. @@ -3137,22 +3146,12 @@ std::pair, std::vector> parseDMFilePackInfo( size_t rows = 0; UInt32 preceded_rows = 0; - for (const auto & dmfile : dmfiles) + for (size_t i = 0; i < dmfiles.size(); ++i) { - DMFilePackFilter pack_filter = DMFilePackFilter::loadFrom( - dmfile, - dm_context.global_context.getMinMaxIndexCache(), - /*set_cache_if_miss*/ true, - read_ranges, - filter, - /*read_pack*/ {}, - dm_context.global_context.getFileProvider(), - dm_context.global_context.getReadLimiter(), - dm_context.scan_context, - dm_context.tracing_id, - ReadTag::MVCC); - const auto & pack_res = pack_filter.getPackResConst(); - const auto & handle_res = pack_filter.getHandleRes(); + const auto & dmfile = dmfiles[i]; + const auto & pack_filter = pack_filter_result[i]; + const auto & pack_res = pack_filter->getPackResConst(); + const auto & handle_res = pack_filter->getHandleRes(); const auto & pack_stats = dmfile->getPackStats(); auto some_packs_set = std::make_shared(); @@ -3167,7 +3166,7 @@ std::pair, std::vector> parseDMFilePackInfo( } if (handle_res[pack_id] == RSResult::Some || pack_stat.not_clean > 0 - || pack_filter.getMaxVersion(pack_id) > start_ts) + || pack_filter->getMaxVersion(pack_id) > start_ts) { // We need to read this pack to do RowKey or MVCC filter. some_packs_set->insert(pack_id); @@ -3202,7 +3201,7 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3216,7 +3215,7 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( return elapse_ns / 1'000'000.0; }; - auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, dm_context, read_ranges, filter, start_ts); + auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts); if (skipped_ranges.size() == 1 && skipped_ranges[0].offset == 0 && skipped_ranges[0].rows == segment_snap->stable->getDMFilesRows()) @@ -3266,11 +3265,11 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( dm_context, columns_to_read, read_ranges, - filter, start_ts, expected_block_size, /*enable_handle_clean_read*/ false, ReadTag::MVCC, + pack_filter_results, /*is_fast_scan*/ false, /*enable_del_clean_read*/ false, /*read_packs*/ some_packs_sets, @@ -3304,6 +3303,7 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, ReadTag read_tag) @@ -3314,15 +3314,17 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( constexpr auto is_fast_scan = true; auto enable_del_clean_read = !hasColumn(columns_to_read, TAG_COLUMN_ID); - SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->getInputStream( + auto ann_query_info = getANNQueryInfo(filter); + SkippableBlockInputStreamPtr stable_stream = segment_snap->stable->tryGetInputStreamWithVectorIndex( dm_context, columns_to_read, read_ranges, - filter, + ann_query_info, start_ts, expected_block_size, enable_handle_clean_read, read_tag, + pack_filter_results, is_fast_scan, enable_del_clean_read, /* read_packs */ {}, @@ -3339,7 +3341,6 @@ SkippableBlockInputStreamPtr Segment::getConcatSkippableBlockInputStream( columns_to_read_ptr, this->rowkey_range, read_tag); - auto ann_query_info = getANNQueryInfo(filter); SkippableBlockInputStreamPtr persisted_files_stream = ColumnFileSetWithVectorIndexInputStream::tryBuild( dm_context, persisted_files, @@ -3365,6 +3366,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & data_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size) { @@ -3376,6 +3378,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( *filter_columns, data_ranges, filter->rs_operator, + pack_filter_results, start_ts, expected_block_size, ReadTag::LMFilter); @@ -3444,6 +3447,7 @@ BlockInputStreamPtr Segment::getLateMaterializationStream( *rest_columns_to_read, data_ranges, filter->rs_operator, + pack_filter_results, start_ts, expected_block_size, ReadTag::Query); @@ -3481,6 +3485,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t build_bitmap_filter_block_rows, size_t read_data_block_rows) @@ -3495,7 +3500,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( dm_context, segment_snap, real_ranges, - filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, start_ts, build_bitmap_filter_block_rows); @@ -3515,6 +3520,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( segment_snap, real_ranges, filter, + pack_filter_results, start_ts, read_data_block_rows); } @@ -3526,6 +3532,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( columns_to_read, real_ranges, filter ? filter->rs_operator : EMPTY_RS_OPERATOR, + pack_filter_results, start_ts, read_data_block_rows, ReadTag::Query); diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 22b50f8ad8c..b8ba5b3400f 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -239,7 +239,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, bool need_row_id = false); @@ -248,7 +248,7 @@ class Segment const DMContext & dm_context, const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter = {}, + const DMFilePackFilterResults & pack_filter_results = {}, UInt64 start_ts = std::numeric_limits::max(), size_t expected_block_size = DEFAULT_BLOCK_SIZE); @@ -270,7 +270,7 @@ class Segment const ColumnDefines & columns_to_read, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, size_t expected_block_size = DEFAULT_BLOCK_SIZE); BlockInputStreamPtr getInputStreamModeRaw( @@ -684,13 +684,13 @@ class Segment const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, const StableSnapshotPtr & stable_snap, const DeltaValueReaderPtr & delta_reader, const DeltaIndexIterator & delta_index_begin, const DeltaIndexIterator & delta_index_end, size_t expected_block_size, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results = {}, UInt64 start_ts = std::numeric_limits::max(), bool need_row_id = false); @@ -734,21 +734,21 @@ class Segment const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); BitmapFilterPtr buildBitmapFilterNormal( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); BitmapFilterPtr buildBitmapFilterStableOnly( const DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, - const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); SkippableBlockInputStreamPtr getConcatSkippableBlockInputStream( @@ -758,6 +758,7 @@ class Segment const ColumnDefines & columns_to_read, const RowKeyRanges & read_ranges, const RSOperatorPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size, ReadTag read_tag); @@ -767,6 +768,7 @@ class Segment const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & read_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t build_bitmap_filter_block_rows, size_t read_data_block_rows); @@ -778,6 +780,7 @@ class Segment const SegmentSnapshotPtr & segment_snap, const RowKeyRanges & data_ranges, const PushDownFilterPtr & filter, + const DMFilePackFilterResults & pack_filter_results, UInt64 start_ts, size_t expected_block_size); diff --git a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp index 76edbcd84f4..28822cc26af 100644 --- a/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp +++ b/dbms/src/Storages/DeltaMerge/SegmentReadTask.cpp @@ -521,7 +521,7 @@ void SegmentReadTask::checkMemTableSet(const ColumnFileSetSnapshotPtr & mem_tabl void SegmentReadTask::checkMemTableSetReady() const { const auto & mem_table_snap = read_snapshot->delta->getMemTableSetSnapshot(); - for (auto & cf : mem_table_snap->getColumnFiles()) + for (const auto & cf : mem_table_snap->getColumnFiles()) { if (auto * in_mem_cf = cf->tryToInMemoryFile(); in_mem_cf) { diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 10b03785e1d..9d6d7b8a707 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -49,21 +49,15 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang } else if (dm_context != nullptr) { - auto index_cache = dm_context->global_context.getGlobalContext().getMinMaxIndexCache(); for (const auto & file : files_) { auto pack_filter = DMFilePackFilter::loadFrom( + *dm_context, file, - index_cache, /*set_cache_if_miss*/ true, {range}, EMPTY_RS_OPERATOR, - {}, - dm_context->global_context.getFileProvider(), - dm_context->getReadLimiter(), - dm_context->scan_context, - dm_context->tracing_id, - ReadTag::Internal); + {}); auto [file_valid_rows, file_valid_bytes] = pack_filter.validRowsAndBytes(); rows += file_valid_rows; bytes += file_valid_bytes; @@ -377,17 +371,12 @@ void StableValueSpace::calculateStableProperty( mvcc_stream->readSuffix(); } auto pack_filter = DMFilePackFilter::loadFrom( + context, file, - context.global_context.getMinMaxIndexCache(), /*set_cache_if_miss*/ false, {rowkey_range}, EMPTY_RS_OPERATOR, - {}, - context.global_context.getFileProvider(), - context.getReadLimiter(), - context.scan_context, - context.tracing_id, - ReadTag::Internal); + {}); const auto & pack_res = pack_filter.getPackResConst(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; @@ -461,11 +450,72 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( const DMContext & context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, UInt64 max_data_version, size_t expected_block_size, bool enable_handle_clean_read, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, + bool is_fast_scan, + bool enable_del_clean_read, + const std::vector & read_packs, + bool need_row_id) +{ + LOG_DEBUG( + log, + "start_ts: {}, enable_handle_clean_read: {}, is_fast_mode: {}, enable_del_clean_read: {}", + max_data_version, + enable_handle_clean_read, + is_fast_scan, + enable_del_clean_read); + SkippableBlockInputStreams streams; + std::vector rows; + streams.reserve(stable->files.size()); + rows.reserve(stable->files.size()); + + for (size_t i = 0; i < stable->files.size(); i++) + { + DMFileBlockInputStreamBuilder builder(context.global_context); + const auto & pack_filter_result = !pack_filter_results.empty() + ? pack_filter_results[i] + : DMFilePackFilterResult::emptyResult(context, stable->files[i]); + builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) + .enableColumnCacheLongTerm(context.pk_col_id) + .setDMFilePackFilterResult(pack_filter_result) + .setColumnCache(column_caches[i]) + .setTracingID(context.tracing_id) + .setRowsThreshold(expected_block_size) + .setReadPacks(read_packs.size() > i ? read_packs[i] : nullptr) + .setReadTag(read_tag); + + streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges, context.scan_context)); + rows.push_back(stable->files[i]->getRows()); + } + if (need_row_id) + { + return std::make_shared>( + streams, + std::move(rows), + context.scan_context); + } + else + { + return std::make_shared>( + streams, + std::move(rows), + context.scan_context); + } +} + +SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVectorIndex( + const DMContext & context, + const ColumnDefines & read_columns, + const RowKeyRanges & rowkey_ranges, + const ANNQueryInfoPtr & ann_query_info, + UInt64 max_data_version, + size_t expected_block_size, + bool enable_handle_clean_read, + ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, bool is_fast_scan, bool enable_del_clean_read, const std::vector & read_packs, @@ -489,9 +539,13 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( for (size_t i = 0; i < stable->files.size(); i++) { DMFileBlockInputStreamBuilder builder(context.global_context); + const auto & pack_filter_result = !pack_filter_results.empty() + ? pack_filter_results[i] + : DMFilePackFilterResult::emptyResult(context, stable->files[i]); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) .enableColumnCacheLongTerm(context.pk_col_id) - .setRSOperator(filter) + .setAnnQureyInfo(ann_query_info) + .setDMFilePackFilterResult(pack_filter_result) .setColumnCache(column_caches[i]) .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size) @@ -543,17 +597,12 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & for (auto & f : stable->files) { auto filter = DMFilePackFilter::loadFrom( + context, f, - context.global_context.getMinMaxIndexCache(), /*set_cache_if_miss*/ false, {range}, RSOperatorPtr{}, - IdSetPtr{}, - context.global_context.getFileProvider(), - context.getReadLimiter(), - context.scan_context, - context.tracing_id, - ReadTag::Internal); + IdSetPtr{}); const auto & pack_stats = f->getPackStats(); const auto & pack_res = filter.getPackResConst(); for (size_t i = 0; i < pack_stats.size(); ++i) @@ -589,17 +638,12 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, co { const auto & file = stable->files[file_idx]; auto filter = DMFilePackFilter::loadFrom( + context, file, - context.global_context.getMinMaxIndexCache(), /*set_cache_if_miss*/ false, {range}, RSOperatorPtr{}, - IdSetPtr{}, - context.global_context.getFileProvider(), - context.getReadLimiter(), - context.scan_context, - context.tracing_id, - ReadTag::Internal); + IdSetPtr{}); const auto & handle_filter_result = filter.getHandleRes(); if (file_idx == 0) { diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index b6e07214f1a..8fdddbaeb48 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -14,12 +14,16 @@ #pragma once +#include +#include #include #include #include #include #include #include +#include +#include #include #include #include @@ -224,11 +228,26 @@ class StableValueSpace : public std::enable_shared_from_this const DMContext & context, // const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, - const RSOperatorPtr & filter, UInt64 max_data_version, size_t expected_block_size, bool enable_handle_clean_read, ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results = {}, + bool is_fast_scan = false, + bool enable_del_clean_read = false, + const std::vector & read_packs = {}, + bool need_row_id = false); + + SkippableBlockInputStreamPtr tryGetInputStreamWithVectorIndex( + const DMContext & context, + const ColumnDefines & read_columns, + const RowKeyRanges & rowkey_ranges, + const ANNQueryInfoPtr & ann_query_info, + UInt64 max_data_version, + size_t expected_block_size, + bool enable_handle_clean_read, + ReadTag read_tag, + const DMFilePackFilterResults & pack_filter_results, bool is_fast_scan = false, bool enable_del_clean_read = false, const std::vector & read_packs = {}, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 0672993e4f4..7235bab5c5c 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -1575,12 +1575,14 @@ try auto test_read_filter = [&](const HandleRange & range) { // Filtered by rough set filter auto filter = toRSFilter(i64_cd, range); + const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; + auto pack_result = std::make_shared( + DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream - = builder.setColumnCache(column_cache) - .setRSOperator(filter) // Filtered by rough set filter - .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}, std::make_shared()); + auto stream = builder.setColumnCache(column_cache) + .setDMFilePackFilterResult(pack_result) + .build(dm_file, *cols, read_ranges, std::make_shared()); Int64 expect_first_pk = static_cast(std::floor(std::max(0, range.start) / span_per_part)) * span_per_part; Int64 expect_last_pk = std::min( @@ -1656,12 +1658,14 @@ try // (first range) Or (Unsupported) -> should NOT filter any chunk filters.emplace_back(createOr({one_part_filter, createUnsupported("test")}), num_rows_write); auto test_read_filter = [&](const DM::RSOperatorPtr & filter, const size_t num_rows_should_read) { + const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; + auto pack_result = std::make_shared( + DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream - = builder.setColumnCache(column_cache) - .setRSOperator(filter) // Filtered by rough set filter - .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}, std::make_shared()); + auto stream = builder.setColumnCache(column_cache) + .setDMFilePackFilterResult(pack_result) + .build(dm_file, *cols, read_ranges, std::make_shared()); Int64 expect_first_pk = 0; Int64 expect_last_pk = num_rows_should_read; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 9e1441f96a8..6c7d5c8ed76 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1087,7 +1088,7 @@ try dmContext(), segment_snap, real_ranges, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); // the bitmap only contains the overlapped packs of ColumnFileBig. So only 60 here. @@ -1107,6 +1108,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, + DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); @@ -1148,17 +1150,14 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, + DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); // Only the rows in [30, 50) and [80, 90) valid auto vec = createNumbers(30, 50); vec.append_range(createNumbers(80, 90)); - ASSERT_INPUTSTREAM_BLOCK_UR( - in, - Block({ - createColumn(vec), - })); + ASSERT_INPUTSTREAM_BLOCK_UR(in, Block({createColumn(vec)})); } } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp index 89f7181f98d..dcc9f38d50a 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp @@ -240,7 +240,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.5})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -265,7 +265,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -290,7 +290,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -318,7 +318,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -343,7 +343,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -368,7 +368,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -393,7 +393,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -428,7 +428,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -453,7 +453,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -491,7 +491,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -589,7 +589,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -615,7 +615,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -644,7 +644,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -674,7 +674,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -700,7 +700,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -729,7 +729,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -759,7 +759,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -784,7 +784,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.8})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(3, true)) .tryBuildWithVectorIndex( dm_file, @@ -812,7 +812,7 @@ try bitmap_filter->set(/* start */ 2, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 3)) .tryBuildWithVectorIndex( dm_file, @@ -876,7 +876,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.5})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(5, true)) .tryBuildWithVectorIndex( dm_file, @@ -944,7 +944,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({5.0, 5.0, 5.5})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(6, true)) .tryBuildWithVectorIndex( dm_file, @@ -969,7 +969,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({1.0, 2.0, 3.0})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(6, true)) .tryBuildWithVectorIndex( dm_file, @@ -994,7 +994,7 @@ try ann_query_info->set_ref_vec_f32(encodeVectorFloat32({0.0, 0.0, 0.0})); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView::createWithFilter(6, true)) .tryBuildWithVectorIndex( dm_file, @@ -1022,7 +1022,7 @@ try bitmap_filter->set(/* start */ 5, /* limit */ 1, false); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 6)) .tryBuildWithVectorIndex( dm_file, @@ -1093,7 +1093,7 @@ try bitmap_filter->set(0, 6); // 0~6 rows are valid, 6~9 rows are invalid due to pack filter. DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 9)) .tryBuildWithVectorIndex(dm_file, read_cols, row_key_ranges, std::make_shared()); ASSERT_INPUTSTREAM_COLS_UR( @@ -1107,7 +1107,7 @@ try // TopK=4 ann_query_info->set_top_k(4); builder = DMFileBlockInputStreamBuilder(dbContext()); - stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 9)) .tryBuildWithVectorIndex(dm_file, read_cols, row_key_ranges, std::make_shared()); ASSERT_INPUTSTREAM_COLS_UR( @@ -1136,7 +1136,7 @@ try bitmap_filter->set(3, 2); DMFileBlockInputStreamBuilder builder(dbContext()); - auto stream = builder.setRSOperator(wrapWithANNQueryInfo(nullptr, ann_query_info)) + auto stream = builder.setAnnQureyInfo(ann_query_info) .setBitmapFilter(BitmapFilterView(bitmap_filter, 0, 9)) .tryBuildWithVectorIndex(dm_file, read_cols, row_key_ranges, std::make_shared()); ASSERT_INPUTSTREAM_COLS_UR( @@ -1204,6 +1204,7 @@ class VectorIndexSegmentTestBase snapshot, {range}, std::make_shared(wrapWithANNQueryInfo({}, ann_query)), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp index 226760ff9c4..d4453abd195 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp @@ -16,11 +16,14 @@ #include #include #include +#include #include #include #include #include #include + + using namespace std::chrono_literals; using namespace DB::tests; @@ -371,7 +374,7 @@ TEST_F(SegmentBitmapFilterTest, CleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -393,7 +396,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -413,7 +416,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), 1, DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -441,7 +444,7 @@ TEST_F(SegmentBitmapFilterTest, StableRange) *dm_context, snap, {buildRowKeyRange(10000, 50000)}, // [10000, 50000) - EMPTY_RS_OPERATOR, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -510,7 +513,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - nullptr, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 30); @@ -540,7 +543,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - nullptr, + DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 750); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index 0f8c77f515d..f2c10e2a677 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -1025,7 +1025,7 @@ std::vector SegmentTestBasic::readSegment(PageIdU64 segment_id, bool need columns_to_read, snapshot, ranges.empty() ? RowKeyRanges{segment->getRowKeyRange()} : ranges, - nullptr, + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, need_row_id); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp index 549e335a951..01e38b58c1e 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_skippable_block_input_stream.cpp @@ -105,6 +105,7 @@ class SkippableBlockInputStreamTest : public SegmentTestBasic columns_to_read, read_ranges, EMPTY_RS_OPERATOR, + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, ReadTag::Internal); From f3d46f7cb8cec55fbe9c152ebe152dcb6249e396 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 11:13:38 +0800 Subject: [PATCH 02/11] fix Signed-off-by: Lloyd-Pottiger --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 2 +- .../Storages/DeltaMerge/File/ColumnStream.cpp | 2 +- .../File/DMFileBlockInputStream.cpp | 20 +++++++-- .../DeltaMerge/File/DMFileBlockInputStream.h | 3 +- .../DeltaMerge/File/DMFilePackFilter.cpp | 6 +-- .../DeltaMerge/File/DMFilePackFilter.h | 4 +- .../File/DMFilePackFilterResult.cpp | 9 ++-- .../DeltaMerge/File/DMFilePackFilterResult.h | 45 +++++++++++++------ .../Storages/DeltaMerge/File/DMFileReader.cpp | 14 +++--- .../Storages/DeltaMerge/File/DMFileReader.h | 4 +- .../DMFileWithVectorIndexBlockInputStream.cpp | 2 +- dbms/src/Storages/DeltaMerge/Index/RSResult.h | 7 +-- dbms/src/Storages/DeltaMerge/Segment.cpp | 8 ++-- .../Storages/DeltaMerge/StableValueSpace.cpp | 26 +++++------ .../tests/gtest_dm_delta_merge_store.cpp | 4 +- .../DeltaMerge/tests/gtest_dm_file.cpp | 6 +-- .../DeltaMerge/tests/gtest_dm_segment.cpp | 6 +-- .../DeltaMerge/tests/gtest_segment_bitmap.cpp | 12 ++--- 18 files changed, 103 insertions(+), 77 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 90aa65c10ca..931247d2f9f 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -45,7 +45,7 @@ void ColumnFileBig::calculateStat(const DMContext & dm_context) {segment_range}, EMPTY_RS_OPERATOR, {}); - std::tie(valid_rows, valid_bytes) = pack_filter.validRowsAndBytes(); + std::tie(valid_rows, valid_bytes) = pack_filter->validRowsAndBytes(); } void ColumnFileBig::removeData(WriteBatches & wbs) const diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index f7c4c31f40a..9e2511b45fa 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -157,7 +157,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe // Try to get the largest buffer size of reading continuous packs size_t buffer_size = 0; - const auto & pack_res = reader.pack_filter.getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackResConst(); for (size_t i = 0; i < n_packs; /*empty*/) { if (!pack_res[i].isUse()) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index b3aee6221fb..80070240de3 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -65,6 +65,13 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( max_sharing_column_bytes_for_all = 0; } + // If pack_filter is not set, we will create a default one. + if (!pack_filter) + { + pack_filter + = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + } + DMFileReader reader( dmfile, read_columns, @@ -73,7 +80,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( enable_del_clean_read, is_fast_scan, max_data_version, - *pack_filter, + pack_filter, mark_cache, enable_column_cache, column_cache, @@ -165,6 +172,13 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; + // If pack_filter is not set, we will create a default one. + if (!pack_filter) + { + pack_filter + = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + } + DMFileReader rest_columns_reader( dmfile, rest_columns, @@ -173,7 +187,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn enable_del_clean_read, is_fast_scan, max_data_version, - *pack_filter, + pack_filter, mark_cache, enable_column_cache, column_cache, @@ -185,7 +199,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn tracing_id, enable_read_thread, scan_context, - ReadTag::Query); + read_tag); if (column_cache_long_term && pk_col_id) // ColumnCacheLongTerm is only filled in Vector Search. diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h index a2a89ae7f26..23b30ace2c1 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.h @@ -138,7 +138,7 @@ class DMFileBlockInputStreamBuilder return *this; } - DMFileBlockInputStreamBuilder setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_) + DMFileBlockInputStreamBuilder & setAnnQureyInfo(const ANNQueryInfoPtr & ann_query_info_) { ann_query_info = ann_query_info_; return *this; @@ -162,6 +162,7 @@ class DMFileBlockInputStreamBuilder read_one_pack_every_time = true; return *this; } + DMFileBlockInputStreamBuilder & setRowsThreshold(size_t rows_threshold_per_read_) { rows_threshold_per_read = rows_threshold_per_read_; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 3bbe0bd0967..f93559dae8f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -21,12 +21,12 @@ namespace DB::DM { -DMFilePackFilterResult DMFilePackFilter::load(const DMContext & dm_context) +DMFilePackFilterResultPtr DMFilePackFilter::load(const DMContext & dm_context) { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); - DMFilePackFilterResult result(dm_context, dmfile, pack_count); + DMFilePackFilterResult result(dm_context, dmfile); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { @@ -153,7 +153,7 @@ DMFilePackFilterResult DMFilePackFilter::load(const DMContext & dm_context) some_count, all_count, all_null_count); - return result; + return std::make_shared(std::move(result)); } void DMFilePackFilter::loadIndex( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 2483df83c52..af9ba2d8a66 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -45,7 +45,7 @@ class DMFilePackFilter public: // Empty `rowkey_ranges` means do not filter by rowkey_ranges - static DMFilePackFilterResult loadFrom( + static DMFilePackFilterResultPtr loadFrom( const DMContext & dm_context, const DMFilePtr & dmfile, bool set_cache_if_miss, @@ -91,7 +91,7 @@ class DMFilePackFilter , read_limiter(read_limiter_) {} - DMFilePackFilterResult load(const DMContext & dm_context); + DMFilePackFilterResultPtr load(const DMContext & dm_context); static void loadIndex( ColumnIndexes & indexes, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp index 7f431338a75..2162d283733 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include @@ -72,12 +71,12 @@ void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const DMFilePackFilter::loadIndex( param.indexes, dmfile, - dm_context.global_context.getFileProvider(), - dm_context.global_context.getMinMaxIndexCache(), + file_provider, + index_cache, true, col_id, - dm_context.global_context.getReadLimiter(), - dm_context.scan_context); + read_limiter, + scan_context); } } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 5ac4e2b8006..eaccb7d3474 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -31,15 +32,33 @@ class DMFilePackFilterResult friend class DMFilePackFilter; public: - DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_, size_t pack_count_) - : dm_context(dm_context_) + DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_) + : index_cache(dm_context_.global_context.getMinMaxIndexCache()) + , file_provider(dm_context_.global_context.getFileProvider()) + , read_limiter(dm_context_.global_context.getReadLimiter()) + , scan_context(dm_context_.scan_context) , dmfile(dmfile_) - , handle_res(pack_count_, RSResult::All) + , handle_res(dmfile->getPacks(), RSResult::All) + , pack_res(dmfile->getPacks(), RSResult::Some) + {} + + DMFilePackFilterResult( + const MinMaxIndexCachePtr & index_cache_, + const FileProviderPtr & file_provider_, + const ReadLimiterPtr & read_limiter_, + const ScanContextPtr & scan_context, + const DMFilePtr & dmfile_) + : index_cache(index_cache_) + , file_provider(file_provider_) + , read_limiter(read_limiter_) + , scan_context(scan_context) + , dmfile(dmfile_) + , handle_res(dmfile->getPacks(), RSResult::All) + , pack_res(dmfile->getPacks(), RSResult::Some) {} const RSResults & getHandleRes() const { return handle_res; } const RSResults & getPackResConst() const { return pack_res; } - RSResults & getPackRes() { return pack_res; } UInt64 countUsePack() const; Handle getMinHandle(size_t pack_id) const @@ -66,19 +85,13 @@ class DMFilePackFilterResult return minmax_index->getUInt64MinMax(pack_id).second; } - static DMFilePackFilterResultPtr emptyResult(const DMContext & dm_context, const DMFilePtr & dmfile) - { - return std::make_shared(dm_context, dmfile, 0); - } - - static DMFilePackFilterResults emptyResults(const DMContext & dm_context, const DMFiles & files) + // Only for test + static DMFilePackFilterResults defaultResults(const DMContext & dm_context, const DMFiles & files) { DMFilePackFilterResults results; results.reserve(files.size()); for (const auto & file : files) - { - results.push_back(emptyResult(dm_context, file)); - } + results.push_back(std::make_shared(dm_context, file)); return results; } @@ -92,7 +105,11 @@ class DMFilePackFilterResult void tryLoadIndex(ColId col_id) const; private: - const DMContext & dm_context; + MinMaxIndexCachePtr index_cache; + FileProviderPtr file_provider; + ReadLimiterPtr read_limiter; + + const ScanContextPtr scan_context; DMFilePtr dmfile; mutable RSCheckParam param; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 125db7956c8..64bfe16f7f7 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -46,7 +46,7 @@ DMFileReader::DMFileReader( bool is_fast_scan_, UInt64 max_read_version_, // filters - const DMFilePackFilterResult & pack_filter_, + const DMFilePackFilterResultPtr & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -260,7 +260,7 @@ Block DMFileReader::readImpl(const ReadBlockInfo & read_info) }); const auto & pack_stats = dmfile->getPackStats(); const auto & pack_properties = dmfile->getPackProperties(); - const auto & handle_res = pack_filter.getHandleRes(); // alias of handle_res in pack_filter + const auto & handle_res = pack_filter->getHandleRes(); // alias of handle_res in pack_filter std::vector handle_column_clean_read_packs; std::vector del_column_clean_read_packs; std::vector version_column_clean_read_packs; @@ -311,7 +311,7 @@ Block DMFileReader::readImpl(const ReadBlockInfo & read_info) // If all handle in a pack are in the given range, no not_clean rows, and max version <= max_read_version, // we do not need to read handle column. if (handle_res[i] == RSResult::All && pack_stats[i].not_clean == 0 - && pack_filter.getMaxVersion(i) <= max_read_version) + && pack_filter->getMaxVersion(i) <= max_read_version) { handle_column_clean_read_packs.push_back(i); version_column_clean_read_packs.push_back(i); @@ -374,12 +374,12 @@ ColumnPtr DMFileReader::cleanRead( { if (is_common_handle) { - StringRef min_handle = pack_filter.getMinStringHandle(range.first); + StringRef min_handle = pack_filter->getMinStringHandle(range.first); return cd.type->createColumnConst(rows_count, Field(min_handle.data, min_handle.size)); } else { - Handle min_handle = pack_filter.getMinHandle(range.first); + Handle min_handle = pack_filter->getMinHandle(range.first); return cd.type->createColumnConst(rows_count, Field(min_handle)); } } @@ -706,7 +706,7 @@ void DMFileReader::addSkippedRows(UInt64 rows) void DMFileReader::initReadBlockInfos() { - const auto & pack_res = pack_filter.getPackResConst(); + const auto & pack_res = pack_filter->getPackResConst(); const auto & pack_stats = dmfile->getPackStats(); const size_t read_pack_limit = read_one_pack_every_time ? 1 : std::numeric_limits::max(); @@ -756,7 +756,7 @@ std::vector DMFileReader::splitReadBlockInfos( { const auto pack_end = read_info.start_pack_id + read_info.pack_count; const size_t start_row_offset = pack_offset[read_info.start_pack_id]; - const auto & pack_res = pack_filter.getPackResConst(); + const auto & pack_res = pack_filter->getPackResConst(); const auto & pack_stats = dmfile->getPackStats(); std::vector new_read_block_infos; new_read_block_infos.reserve(pack_end - read_info.start_pack_id); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h index a2336d3f7f9..bb477865c56 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.h @@ -56,7 +56,7 @@ class DMFileReader // The the MVCC filter version. Used by clean read check. UInt64 max_read_version_, // filters - const DMFilePackFilterResult & pack_filter_, + const DMFilePackFilterResultPtr & pack_filter_, // caches const MarkCachePtr & mark_cache_, bool enable_column_cache_, @@ -184,7 +184,7 @@ class DMFileReader const UInt64 max_read_version; /// Filters - const DMFilePackFilterResult & pack_filter; + const DMFilePackFilterResultPtr pack_filter; /// Caches MarkCachePtr mark_cache; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp index 6f4ac3b10d2..dc4553455d4 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp @@ -151,7 +151,7 @@ void DMFileWithVectorIndexBlockInputStream::updateReadBlockInfos() read_block_infos.clear(); const auto & pack_stats = dmfile->getPackStats(); - const auto & pack_res = reader.pack_filter.getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackResConst(); // Update valid_packs_before_search for (const auto res : pack_res) diff --git a/dbms/src/Storages/DeltaMerge/Index/RSResult.h b/dbms/src/Storages/DeltaMerge/Index/RSResult.h index e52ce7bbfb6..f76b0fbd96d 100644 --- a/dbms/src/Storages/DeltaMerge/Index/RSResult.h +++ b/dbms/src/Storages/DeltaMerge/Index/RSResult.h @@ -46,9 +46,6 @@ class RSResult static ValueResult logicalAnd(ValueResult v0, ValueResult v1) noexcept; static ValueResult logicalOr(ValueResult v0, ValueResult v1) noexcept; - // Deleting or privating constructors, so that cannot create invalid objects. - // Use the static member variables below. - RSResult() = delete; RSResult(ValueResult v_, bool has_null_) : v(v_) , has_null(has_null_) @@ -60,6 +57,10 @@ class RSResult bool has_null; public: + // Deleting constructors, so that cannot create invalid objects. + // Use the static member variables below. + RSResult() = delete; + bool isUse() const noexcept { return v != ValueResult::None; } bool allMatch() const noexcept { return *this == RSResult::All; } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 79ce22e6b92..fc9c4b8902d 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -950,16 +950,17 @@ BlockInputStreamPtr Segment::getInputStream( // load DMilePackFilterResult for each DMFile DMFilePackFilterResults pack_filter_results; + pack_filter_results.reserve(segment_snap->stable->getDMFiles().size()); for (const auto & dmfile : segment_snap->stable->getDMFiles()) { - auto result = std::make_shared(DMFilePackFilter::loadFrom( + auto result = DMFilePackFilter::loadFrom( dm_context, dmfile, /*set_cache_if_miss*/ true, read_ranges, filter ? filter->rs_operator : EMPTY_RS_OPERATOR, - /*read_pack*/ {})); - pack_filter_results.emplace_back(std::move(result)); + /*read_pack*/ {}); + pack_filter_results.push_back(result); } switch (read_mode) @@ -3525,6 +3526,7 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( read_data_block_rows); } + std::cout << "getBitmapFilterInputStream" << std::endl; auto stream = getConcatSkippableBlockInputStream( bitmap_filter, segment_snap, diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 9d6d7b8a707..018f10c4eab 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -58,7 +58,7 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang {range}, EMPTY_RS_OPERATOR, {}); - auto [file_valid_rows, file_valid_bytes] = pack_filter.validRowsAndBytes(); + auto [file_valid_rows, file_valid_bytes] = pack_filter->validRowsAndBytes(); rows += file_valid_rows; bytes += file_valid_bytes; } @@ -377,12 +377,12 @@ void StableValueSpace::calculateStableProperty( {rowkey_range}, EMPTY_RS_OPERATOR, {}); - const auto & pack_res = pack_filter.getPackResConst(); + const auto & pack_res = pack_filter->getPackResConst(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; if (use_new_pack_properties) { - const size_t use_packs_count = pack_filter.countUsePack(); + const size_t use_packs_count = pack_filter->countUsePack(); RUNTIME_CHECK_MSG( static_cast(new_pack_properties.property_size()) == use_packs_count, @@ -472,15 +472,12 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( streams.reserve(stable->files.size()); rows.reserve(stable->files.size()); - for (size_t i = 0; i < stable->files.size(); i++) + for (size_t i = 0; i < stable->files.size(); ++i) { DMFileBlockInputStreamBuilder builder(context.global_context); - const auto & pack_filter_result = !pack_filter_results.empty() - ? pack_filter_results[i] - : DMFilePackFilterResult::emptyResult(context, stable->files[i]); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) .enableColumnCacheLongTerm(context.pk_col_id) - .setDMFilePackFilterResult(pack_filter_result) + .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size) @@ -536,16 +533,13 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe size_t last_rows = 0; - for (size_t i = 0; i < stable->files.size(); i++) + for (size_t i = 0; i < stable->files.size(); ++i) { DMFileBlockInputStreamBuilder builder(context.global_context); - const auto & pack_filter_result = !pack_filter_results.empty() - ? pack_filter_results[i] - : DMFilePackFilterResult::emptyResult(context, stable->files[i]); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) .enableColumnCacheLongTerm(context.pk_col_id) .setAnnQureyInfo(ann_query_info) - .setDMFilePackFilterResult(pack_filter_result) + .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) .setTracingID(context.tracing_id) .setRowsThreshold(expected_block_size) @@ -553,7 +547,7 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe .setReadTag(read_tag); if (bitmap_filter) { - builder = builder.setBitmapFilter( + builder.setBitmapFilter( BitmapFilterView(bitmap_filter, last_rows, last_rows + stable->files[i]->getRows())); last_rows += stable->files[i]->getRows(); } @@ -604,7 +598,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & RSOperatorPtr{}, IdSetPtr{}); const auto & pack_stats = f->getPackStats(); - const auto & pack_res = filter.getPackResConst(); + const auto & pack_res = filter->getPackResConst(); for (size_t i = 0; i < pack_stats.size(); ++i) { if (pack_res[i].isUse()) @@ -644,7 +638,7 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, co {range}, RSOperatorPtr{}, IdSetPtr{}); - const auto & handle_filter_result = filter.getHandleRes(); + const auto & handle_filter_result = filter->getHandleRes(); if (file_idx == 0) { // TODO: this check may not be correct when support multiple files in a stable, let's just keep it now for simplicity diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 226a3bb4cea..94f3f94d45d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -2778,7 +2778,7 @@ Block createBlock(const ColumnDefine & cd, size_t begin, size_t end) } // namespace -TEST_F(DeltaMergeStoreTest, ReadLegacyStringData_CFTiny) +TEST_F(DeltaMergeStoreTest, ReadLegacyStringDataCFTiny) try { // Write legacy string data to CFTiny. @@ -2843,7 +2843,7 @@ try } CATCH -TEST_F(DeltaMergeStoreTest, ReadLegacyStringData_DMFile) +TEST_F(DeltaMergeStoreTest, ReadLegacyStringDataDMFile) try { // Write legacy string data to DMFile. diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 7235bab5c5c..3f2919e5ef3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -1576,8 +1576,7 @@ try // Filtered by rough set filter auto filter = toRSFilter(i64_cd, range); const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; - auto pack_result = std::make_shared( - DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); + auto pack_result = DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {}); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); auto stream = builder.setColumnCache(column_cache) @@ -1659,8 +1658,7 @@ try filters.emplace_back(createOr({one_part_filter, createUnsupported("test")}), num_rows_write); auto test_read_filter = [&](const DM::RSOperatorPtr & filter, const size_t num_rows_should_read) { const auto read_ranges = RowKeyRanges{RowKeyRange::newAll(false, 1)}; - auto pack_result = std::make_shared( - DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {})); + auto pack_result = DMFilePackFilter::loadFrom(dmContext(), dm_file, false, read_ranges, filter, {}); // Test read DMFileBlockInputStreamBuilder builder(dbContext()); auto stream = builder.setColumnCache(column_cache) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 6c7d5c8ed76..5f3930af12e 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -1088,7 +1088,7 @@ try dmContext(), segment_snap, real_ranges, - DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); // the bitmap only contains the overlapped packs of ColumnFileBig. So only 60 here. @@ -1108,7 +1108,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); @@ -1150,7 +1150,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::emptyResults(dmContext(), segment_snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp index d4453abd195..00b030351ac 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp @@ -374,7 +374,7 @@ TEST_F(SegmentBitmapFilterTest, CleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -396,7 +396,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -416,7 +416,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), 1, DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -444,7 +444,7 @@ TEST_F(SegmentBitmapFilterTest, StableRange) *dm_context, snap, {buildRowKeyRange(10000, 50000)}, // [10000, 50000) - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -513,7 +513,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 30); @@ -543,7 +543,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::emptyResults(*dm_context, snap->stable->getDMFiles()), + DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 750); From 08b61f2d34c9667392c7fe00aeef01ec4b65e6d7 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 11:30:38 +0800 Subject: [PATCH 03/11] fix ut Signed-off-by: Lloyd-Pottiger --- .../tests/gtest_dm_delta_merge_store.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 94f3f94d45d..8a4b363a269 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -4172,7 +4172,14 @@ try return filter; }; - DB::registerFunctions(); + try + { + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } constexpr Int64 num_rows = 128; auto filter_all = create_filter(0); @@ -4295,7 +4302,14 @@ try return filter; }; - DB::registerFunctions(); + try + { + DB::registerFunctions(); + } + catch (DB::Exception &) + { + // Maybe another test has already registered, ignore exception here. + } constexpr Int64 num_rows = 128; auto filter_all = create_filter(0); From 692bd0abadac37710a1069f25e042f8ae3761155 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 11:53:08 +0800 Subject: [PATCH 04/11] rename Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp | 2 +- dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h | 2 +- dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp | 4 ++-- .../DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp | 2 +- dbms/src/Storages/DeltaMerge/Segment.cpp | 2 +- dbms/src/Storages/DeltaMerge/StableValueSpace.cpp | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index 9e2511b45fa..9a9f894bdc8 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -157,7 +157,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe // Try to get the largest buffer size of reading continuous packs size_t buffer_size = 0; - const auto & pack_res = reader.pack_filter->getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackRes(); for (size_t i = 0; i < n_packs; /*empty*/) { if (!pack_res[i].isUse()) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index eaccb7d3474..fb3fd953350 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -58,7 +58,7 @@ class DMFilePackFilterResult {} const RSResults & getHandleRes() const { return handle_res; } - const RSResults & getPackResConst() const { return pack_res; } + const RSResults & getPackRes() const { return pack_res; } UInt64 countUsePack() const; Handle getMinHandle(size_t pack_id) const diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 64bfe16f7f7..181b86b13f8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -706,7 +706,7 @@ void DMFileReader::addSkippedRows(UInt64 rows) void DMFileReader::initReadBlockInfos() { - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); const auto & pack_stats = dmfile->getPackStats(); const size_t read_pack_limit = read_one_pack_every_time ? 1 : std::numeric_limits::max(); @@ -756,7 +756,7 @@ std::vector DMFileReader::splitReadBlockInfos( { const auto pack_end = read_info.start_pack_id + read_info.pack_count; const size_t start_row_offset = pack_offset[read_info.start_pack_id]; - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); const auto & pack_stats = dmfile->getPackStats(); std::vector new_read_block_infos; new_read_block_infos.reserve(pack_end - read_info.start_pack_id); diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp index dc4553455d4..a7e65ab9461 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileWithVectorIndexBlockInputStream.cpp @@ -151,7 +151,7 @@ void DMFileWithVectorIndexBlockInputStream::updateReadBlockInfos() read_block_infos.clear(); const auto & pack_stats = dmfile->getPackStats(); - const auto & pack_res = reader.pack_filter->getPackResConst(); + const auto & pack_res = reader.pack_filter->getPackRes(); // Update valid_packs_before_search for (const auto res : pack_res) diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index fc9c4b8902d..ba87ce541be 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3151,7 +3151,7 @@ std::pair, std::vector> parseDMFilePackInfo( { const auto & dmfile = dmfiles[i]; const auto & pack_filter = pack_filter_result[i]; - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); const auto & handle_res = pack_filter->getHandleRes(); const auto & pack_stats = dmfile->getPackStats(); diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 018f10c4eab..3a9347498fb 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -377,7 +377,7 @@ void StableValueSpace::calculateStableProperty( {rowkey_range}, EMPTY_RS_OPERATOR, {}); - const auto & pack_res = pack_filter->getPackResConst(); + const auto & pack_res = pack_filter->getPackRes(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; if (use_new_pack_properties) @@ -598,7 +598,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & RSOperatorPtr{}, IdSetPtr{}); const auto & pack_stats = f->getPackStats(); - const auto & pack_res = filter->getPackResConst(); + const auto & pack_res = filter->getPackRes(); for (size_t i = 0; i < pack_stats.size(); ++i) { if (pack_res[i].isUse()) From 4b01d9375af31c1d5397b312b0d8ef604ff6c5cd Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 14:23:43 +0800 Subject: [PATCH 05/11] fix ut Signed-off-by: Lloyd-Pottiger --- .../File/DMFileBlockInputStream.cpp | 26 +++++++++++++--- .../DeltaMerge/File/DMFilePackFilter.cpp | 4 +-- .../DeltaMerge/File/DMFilePackFilter.h | 31 +++++++++++++++++-- .../DeltaMerge/File/DMFilePackFilterResult.h | 23 +------------- dbms/src/Storages/DeltaMerge/Segment.cpp | 1 - .../DeltaMerge/tests/gtest_dm_segment.cpp | 6 ++-- .../tests/gtest_dm_vector_index.cpp | 16 +++++++++- .../DeltaMerge/tests/gtest_segment_bitmap.cpp | 29 ++++++++++++----- 8 files changed, 93 insertions(+), 43 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 80070240de3..49ba97a93f3 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -68,8 +68,17 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( // If pack_filter is not set, we will create a default one. if (!pack_filter) { - pack_filter - = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + pack_filter = DMFilePackFilter::loadFrom( + index_cache, + file_provider, + read_limiter, + scan_context, + dmfile, + true, + rowkey_ranges, + EMPTY_RS_OPERATOR, + read_packs, + tracing_id); } DMFileReader reader( @@ -175,8 +184,17 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn // If pack_filter is not set, we will create a default one. if (!pack_filter) { - pack_filter - = std::make_shared(index_cache, file_provider, read_limiter, scan_context, dmfile); + pack_filter = DMFilePackFilter::loadFrom( + index_cache, + file_provider, + read_limiter, + scan_context, + dmfile, + true, + rowkey_ranges, + EMPTY_RS_OPERATOR, + read_packs, + tracing_id); } DMFileReader rest_columns_reader( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index f93559dae8f..2ede65781be 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -21,12 +21,12 @@ namespace DB::DM { -DMFilePackFilterResultPtr DMFilePackFilter::load(const DMContext & dm_context) +DMFilePackFilterResultPtr DMFilePackFilter::load() { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); - DMFilePackFilterResult result(dm_context, dmfile); + DMFilePackFilterResult result(index_cache, file_provider, read_limiter, scan_context, dmfile); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index af9ba2d8a66..cf258ac0fe5 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,33 @@ class DMFilePackFilter dm_context.global_context.getReadLimiter(), dm_context.scan_context, dm_context.tracing_id); - return pack_filter.load(dm_context); + return pack_filter.load(); + } + + static DMFilePackFilterResultPtr loadFrom( + const MinMaxIndexCachePtr & index_cache_, + const FileProviderPtr & file_provider_, + const ReadLimiterPtr & read_limiter_, + const ScanContextPtr & scan_context, + const DMFilePtr & dmfile, + bool set_cache_if_miss, + const RowKeyRanges & rowkey_ranges, + const RSOperatorPtr & filter, + const IdSetPtr & read_packs, + const String & tracing_id) + { + DMFilePackFilter pack_filter( + dmfile, + index_cache_, + set_cache_if_miss, + rowkey_ranges, + filter, + read_packs, + file_provider_, + read_limiter_, + scan_context, + tracing_id); + return pack_filter.load(); } private: @@ -91,7 +118,7 @@ class DMFilePackFilter , read_limiter(read_limiter_) {} - DMFilePackFilterResultPtr load(const DMContext & dm_context); + DMFilePackFilterResultPtr load(); static void loadIndex( ColumnIndexes & indexes, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index fb3fd953350..8133deb4c7f 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -14,9 +14,8 @@ #pragma once -#include -#include #include +#include #include #include @@ -32,16 +31,6 @@ class DMFilePackFilterResult friend class DMFilePackFilter; public: - DMFilePackFilterResult(const DMContext & dm_context_, const DMFilePtr & dmfile_) - : index_cache(dm_context_.global_context.getMinMaxIndexCache()) - , file_provider(dm_context_.global_context.getFileProvider()) - , read_limiter(dm_context_.global_context.getReadLimiter()) - , scan_context(dm_context_.scan_context) - , dmfile(dmfile_) - , handle_res(dmfile->getPacks(), RSResult::All) - , pack_res(dmfile->getPacks(), RSResult::Some) - {} - DMFilePackFilterResult( const MinMaxIndexCachePtr & index_cache_, const FileProviderPtr & file_provider_, @@ -85,16 +74,6 @@ class DMFilePackFilterResult return minmax_index->getUInt64MinMax(pack_id).second; } - // Only for test - static DMFilePackFilterResults defaultResults(const DMContext & dm_context, const DMFiles & files) - { - DMFilePackFilterResults results; - results.reserve(files.size()); - for (const auto & file : files) - results.push_back(std::make_shared(dm_context, file)); - return results; - } - // Get valid rows and bytes after filter invalid packs by handle_range and filter std::pair validRowsAndBytes(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index ba87ce541be..6080c076677 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3526,7 +3526,6 @@ BlockInputStreamPtr Segment::getBitmapFilterInputStream( read_data_block_rows); } - std::cout << "getBitmapFilterInputStream" << std::endl; auto stream = getConcatSkippableBlockInputStream( bitmap_filter, segment_snap, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 5f3930af12e..1d32a8c069b 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -1088,7 +1088,7 @@ try dmContext(), segment_snap, real_ranges, - DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); // the bitmap only contains the overlapped packs of ColumnFileBig. So only 60 here. @@ -1108,7 +1108,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); @@ -1150,7 +1150,7 @@ try segment_snap, {RowKeyRange::newAll(false, 1)}, EMPTY_FILTER, - DMFilePackFilterResult::defaultResults(dmContext(), segment_snap->stable->getDMFiles()), + {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp index dcc9f38d50a..c1b17e94f07 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp @@ -1198,13 +1198,27 @@ class VectorIndexSegmentTestBase { auto range = buildRowKeyRange(begin, end); auto [segment, snapshot] = getSegmentForRead(segment_id); + // load DMilePackFilterResult for each DMFile + DMFilePackFilterResults pack_filter_results; + pack_filter_results.reserve(snapshot->stable->getDMFiles().size()); + for (const auto & dmfile : snapshot->stable->getDMFiles()) + { + auto result = DMFilePackFilter::loadFrom( + *dm_context, + dmfile, + /*set_cache_if_miss*/ true, + {range}, + EMPTY_RS_OPERATOR, + /*read_pack*/ {}); + pack_filter_results.push_back(result); + } auto stream = segment->getBitmapFilterInputStream( *dm_context, columns_to_read, snapshot, {range}, std::make_shared(wrapWithANNQueryInfo({}, ann_query)), - {}, + pack_filter_results, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE, DEFAULT_BLOCK_SIZE); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp index 00b030351ac..24ec7be1bba 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_bitmap.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -180,6 +180,18 @@ class SegmentBitmapFilterTest : public SegmentTestBasic ASSERT_TRUE(sequenceEqual(expected_handle.data(), handle->data(), test_case.expected_size)); } } + + auto loadPackFilterResults(const SegmentSnapshotPtr & snap, const RowKeyRanges & ranges) + { + DMFilePackFilterResults results; + results.reserve(snap->stable->getDMFiles().size()); + for (const auto & file : snap->stable->getDMFiles()) + { + auto pack_filter = DMFilePackFilter::loadFrom(*dm_context, file, true, ranges, EMPTY_RS_OPERATOR, {}); + results.push_back(pack_filter); + } + return results; + } }; TEST_F(SegmentBitmapFilterTest, InMemory1) @@ -374,7 +386,7 @@ TEST_F(SegmentBitmapFilterTest, CleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -396,7 +408,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -416,7 +428,7 @@ TEST_F(SegmentBitmapFilterTest, NotCleanStable) *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), 1, DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -440,11 +452,12 @@ TEST_F(SegmentBitmapFilterTest, StableRange) ASSERT_EQ(seg->getDelta()->getDeletes(), 0); ASSERT_EQ(seg->getStable()->getRows(), 50000); + auto ranges = std::vector{buildRowKeyRange(10000, 50000)}; // [10000, 50000) auto bitmap_filter = seg->buildBitmapFilterStableOnly( *dm_context, snap, - {buildRowKeyRange(10000, 50000)}, // [10000, 50000) - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + ranges, + loadPackFilterResults(snap, ranges), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_NE(bitmap_filter, nullptr); @@ -513,7 +526,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 30); @@ -543,7 +556,7 @@ try *dm_context, snap, {seg->getRowKeyRange()}, - DMFilePackFilterResult::defaultResults(*dm_context, snap->stable->getDMFiles()), + loadPackFilterResults(snap, {seg->getRowKeyRange()}), std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); ASSERT_EQ(bitmap_filter->size(), 750); From f8decb802231d9bda6aebd2a890a40cb7b23533e Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 25 Dec 2024 13:33:32 +0800 Subject: [PATCH 06/11] small refine Signed-off-by: JaySon-Huang --- dbms/src/Storages/DeltaMerge/File/DMFile.cpp | 2 +- dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp index 47191e835d4..21048578a4b 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFile.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFile.cpp @@ -219,7 +219,7 @@ size_t DMFile::colIndexSize(ColId id) const } else { - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Index of {} not exist", id); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Index is not exist, col_id={}", id); } } else diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp index 2162d283733..98d6e5da714 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -73,7 +73,7 @@ void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const dmfile, file_provider, index_cache, - true, + /*set_cache_if_miss=*/true, col_id, read_limiter, scan_context); From fcb462b76f85c625fb6982cd831d1b3b1755e80f Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Tue, 24 Dec 2024 15:34:00 +0800 Subject: [PATCH 07/11] refine Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp | 1 - dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp | 1 + .../Storages/DeltaMerge/File/DMFileBlockInputStream.cpp | 5 ++--- dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp | 3 +++ dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h | 8 +------- .../src/Storages/DeltaMerge/File/DMFilePackFilterResult.h | 3 +-- dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp | 1 + dbms/src/Storages/DeltaMerge/StableValueSpace.cpp | 2 -- dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp | 1 - 9 files changed, 9 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index 931247d2f9f..da544bec985 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index 9a9f894bdc8..019caa74dac 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp index 49ba97a93f3..d7ee14df85d 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileBlockInputStream.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -65,7 +64,7 @@ DMFileBlockInputStreamPtr DMFileBlockInputStreamBuilder::build( max_sharing_column_bytes_for_all = 0; } - // If pack_filter is not set, we will create a default one. + // If pack_filter is not set, load from EMPTY_RS_OPERATOR. if (!pack_filter) { pack_filter = DMFilePackFilter::loadFrom( @@ -181,7 +180,7 @@ SkippableBlockInputStreamPtr DMFileBlockInputStreamBuilder::tryBuildWithVectorIn bool enable_read_thread = SegmentReaderPoolManager::instance().isSegmentReader(); bool is_common_handle = !rowkey_ranges.empty() && rowkey_ranges[0].is_common_handle; - // If pack_filter is not set, we will create a default one. + // If pack_filter is not set, load from EMPTY_RS_OPERATOR. if (!pack_filter) { pack_filter = DMFilePackFilter::loadFrom( diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 2ede65781be..e548483dc3e 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -13,7 +13,10 @@ // limitations under the License. #include +#include +#include #include +#include #include #include diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index cf258ac0fe5..5bf28484085 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -14,21 +14,15 @@ #pragma once -#include -#include -#include -#include #include #include #include #include #include -#include #include -#include #include #include -#include + namespace ProfileEvents { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 8133deb4c7f..4d86661ebcd 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -15,7 +15,6 @@ #pragma once #include -#include #include #include @@ -30,7 +29,6 @@ class DMFilePackFilterResult { friend class DMFilePackFilter; -public: DMFilePackFilterResult( const MinMaxIndexCachePtr & index_cache_, const FileProviderPtr & file_provider_, @@ -46,6 +44,7 @@ class DMFilePackFilterResult , pack_res(dmfile->getPacks(), RSResult::Some) {} +public: const RSResults & getHandleRes() const { return handle_res; } const RSResults & getPackRes() const { return pack_res; } UInt64 countUsePack() const; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 181b86b13f8..54acd1ca8dc 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 3a9347498fb..b6606b620a2 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -12,13 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 1d32a8c069b..3e0ede20007 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include From 89bc2b69962256ad515d832f3ddb8244ba435fe0 Mon Sep 17 00:00:00 2001 From: Lloyd-Pottiger Date: Wed, 25 Dec 2024 14:07:56 +0800 Subject: [PATCH 08/11] address comments Signed-off-by: Lloyd-Pottiger --- dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 4d86661ebcd..7f3bdf67325 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -21,10 +21,6 @@ namespace DB::DM { -class DMFilePackFilterResult; -using DMFilePackFilterResultPtr = std::shared_ptr; -using DMFilePackFilterResults = std::vector; - class DMFilePackFilterResult { friend class DMFilePackFilter; From 3f681aa8140fe255c43e58f77035000df579b0dd Mon Sep 17 00:00:00 2001 From: JaySon Date: Wed, 25 Dec 2024 15:01:22 +0800 Subject: [PATCH 09/11] Remove shared_ptr to DMFile inside DMFilePackFilterResult (#22) * Remove useless var from DMFilePackFilterResult Signed-off-by: JaySon-Huang * Remove shared_ptr to DMFile inside DMFilePackFilterResult Signed-off-by: JaySon-Huang * fix Signed-off-by: JaySon-Huang --------- Signed-off-by: JaySon-Huang --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 7 +-- .../DeltaMerge/File/DMFilePackFilter.cpp | 25 +++++++++- .../DeltaMerge/File/DMFilePackFilter.h | 7 +++ .../File/DMFilePackFilterResult.cpp | 22 ++------- .../DeltaMerge/File/DMFilePackFilterResult.h | 48 ++++++++++--------- .../Storages/DeltaMerge/File/DMFileReader.cpp | 6 +-- dbms/src/Storages/DeltaMerge/Segment.cpp | 9 ++-- .../Storages/DeltaMerge/StableValueSpace.cpp | 7 +-- 8 files changed, 75 insertions(+), 56 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index da544bec985..ec7d803b66f 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -37,14 +37,11 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil void ColumnFileBig::calculateStat(const DMContext & dm_context) { - auto pack_filter = DMFilePackFilter::loadFrom( + std::tie(valid_rows, valid_bytes) = DMFilePackFilter::loadValidRowsAndBytes( dm_context, file, /*set_cache_if_miss*/ false, - {segment_range}, - EMPTY_RS_OPERATOR, - {}); - std::tie(valid_rows, valid_bytes) = pack_filter->validRowsAndBytes(); + {segment_range}); } void ColumnFileBig::removeData(WriteBatches & wbs) const diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index e548483dc3e..3f8ea38d174 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -24,12 +25,34 @@ namespace DB::DM { +std::pair DMFilePackFilter::loadValidRowsAndBytes( + const DMContext & dm_context, + const DMFilePtr & dmfile, + bool set_cache_if_miss, + const RowKeyRanges & rowkey_ranges) +{ + auto pack_filter = loadFrom(dm_context, dmfile, set_cache_if_miss, rowkey_ranges, EMPTY_RS_OPERATOR, {}); + + size_t rows = 0; + size_t bytes = 0; + const auto & pack_stats = dmfile->getPackStats(); + for (size_t i = 0; i < pack_stats.size(); ++i) + { + if (pack_filter->pack_res[i].isUse()) + { + rows += pack_stats[i].rows; + bytes += pack_stats[i].bytes; + } + } + return {rows, bytes}; +} + DMFilePackFilterResultPtr DMFilePackFilter::load() { Stopwatch watch; SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); - DMFilePackFilterResult result(index_cache, file_provider, read_limiter, scan_context, dmfile); + DMFilePackFilterResult result(index_cache, read_limiter, pack_count); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) { diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 5bf28484085..5099d4fae61 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -39,6 +39,13 @@ class DMFilePackFilter friend class DMFilePackFilterResult; public: + // Get valid rows and bytes after filter invalid packs by rowkey_ranges + static std::pair loadValidRowsAndBytes( + const DMContext & dm_context, + const DMFilePtr & dmfile, + bool set_cache_if_miss, + const RowKeyRanges & rowkey_ranges); + // Empty `rowkey_ranges` means do not filter by rowkey_ranges static DMFilePackFilterResultPtr loadFrom( const DMContext & dm_context, diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp index 98d6e5da714..b5778efeb86 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.cpp @@ -23,22 +23,6 @@ UInt64 DMFilePackFilterResult::countUsePack() const return std::count_if(pack_res.begin(), pack_res.end(), [](RSResult res) { return res.isUse(); }); } -std::pair DMFilePackFilterResult::validRowsAndBytes() -{ - size_t rows = 0; - size_t bytes = 0; - const auto & pack_stats = dmfile->getPackStats(); - for (size_t i = 0; i < pack_stats.size(); ++i) - { - if (pack_res[i].isUse()) - { - rows += pack_stats[i].rows; - bytes += pack_stats[i].bytes; - } - } - return {rows, bytes}; -} - std::tuple DMFilePackFilterResult::countPackRes() const { UInt64 none_count = 0; @@ -59,7 +43,11 @@ std::tuple DMFilePackFilterResult::countPackRes( return {none_count, some_count, all_count, all_null_count}; } -void DMFilePackFilterResult::tryLoadIndex(ColId col_id) const +void DMFilePackFilterResult::tryLoadIndex( + const DMFilePtr & dmfile, + ColId col_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (param.indexes.count(col_id)) return; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h index 7f3bdf67325..dadf054fd88 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilterResult.h @@ -27,17 +27,12 @@ class DMFilePackFilterResult DMFilePackFilterResult( const MinMaxIndexCachePtr & index_cache_, - const FileProviderPtr & file_provider_, const ReadLimiterPtr & read_limiter_, - const ScanContextPtr & scan_context, - const DMFilePtr & dmfile_) + size_t pack_count) : index_cache(index_cache_) - , file_provider(file_provider_) , read_limiter(read_limiter_) - , scan_context(scan_context) - , dmfile(dmfile_) - , handle_res(dmfile->getPacks(), RSResult::All) - , pack_res(dmfile->getPacks(), RSResult::Some) + , handle_res(pack_count, RSResult::All) + , pack_res(pack_count, RSResult::Some) {} public: @@ -45,47 +40,56 @@ class DMFilePackFilterResult const RSResults & getPackRes() const { return pack_res; } UInt64 countUsePack() const; - Handle getMinHandle(size_t pack_id) const + Handle getMinHandle( + const DMFilePtr & dmfile, + size_t pack_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + tryLoadIndex(dmfile, EXTRA_HANDLE_COLUMN_ID, file_provider, scan_context); auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; return minmax_index->getIntMinMax(pack_id).first; } - StringRef getMinStringHandle(size_t pack_id) const + StringRef getMinStringHandle( + const DMFilePtr & dmfile, + size_t pack_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (!param.indexes.count(EXTRA_HANDLE_COLUMN_ID)) - tryLoadIndex(EXTRA_HANDLE_COLUMN_ID); + tryLoadIndex(dmfile, EXTRA_HANDLE_COLUMN_ID, file_provider, scan_context); auto & minmax_index = param.indexes.find(EXTRA_HANDLE_COLUMN_ID)->second.minmax; return minmax_index->getStringMinMax(pack_id).first; } - UInt64 getMaxVersion(size_t pack_id) const + UInt64 getMaxVersion( + const DMFilePtr & dmfile, + size_t pack_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const { if (!param.indexes.count(VERSION_COLUMN_ID)) - tryLoadIndex(VERSION_COLUMN_ID); + tryLoadIndex(dmfile, VERSION_COLUMN_ID, file_provider, scan_context); auto & minmax_index = param.indexes.find(VERSION_COLUMN_ID)->second.minmax; return minmax_index->getUInt64MinMax(pack_id).second; } - // Get valid rows and bytes after filter invalid packs by handle_range and filter - std::pair validRowsAndBytes(); - // None+NoneNull, Some+SomeNull, All, AllNull std::tuple countPackRes() const; private: - void tryLoadIndex(ColId col_id) const; + void tryLoadIndex( + const DMFilePtr & dmfile, + ColId col_id, + const FileProviderPtr & file_provider, + const ScanContextPtr & scan_context) const; private: MinMaxIndexCachePtr index_cache; - FileProviderPtr file_provider; ReadLimiterPtr read_limiter; - const ScanContextPtr scan_context; - - DMFilePtr dmfile; mutable RSCheckParam param; // `handle_res` is the filter results of `rowkey_ranges`. diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 54acd1ca8dc..b0afccc6d91 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -312,7 +312,7 @@ Block DMFileReader::readImpl(const ReadBlockInfo & read_info) // If all handle in a pack are in the given range, no not_clean rows, and max version <= max_read_version, // we do not need to read handle column. if (handle_res[i] == RSResult::All && pack_stats[i].not_clean == 0 - && pack_filter->getMaxVersion(i) <= max_read_version) + && pack_filter->getMaxVersion(dmfile, i, file_provider, scan_context) <= max_read_version) { handle_column_clean_read_packs.push_back(i); version_column_clean_read_packs.push_back(i); @@ -375,12 +375,12 @@ ColumnPtr DMFileReader::cleanRead( { if (is_common_handle) { - StringRef min_handle = pack_filter->getMinStringHandle(range.first); + StringRef min_handle = pack_filter->getMinStringHandle(dmfile, range.first, file_provider, scan_context); return cd.type->createColumnConst(rows_count, Field(min_handle.data, min_handle.size)); } else { - Handle min_handle = pack_filter->getMinHandle(range.first); + Handle min_handle = pack_filter->getMinHandle(dmfile, range.first, file_provider, scan_context); return cd.type->createColumnConst(rows_count, Field(min_handle)); } } diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 6080c076677..0ecdcd8de68 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -3129,7 +3129,8 @@ struct Range std::pair, std::vector> parseDMFilePackInfo( const DMFiles & dmfiles, const DMFilePackFilterResults & pack_filter_result, - UInt64 start_ts) + UInt64 start_ts, + const DMContext & dm_context) { // Packs that all rows compliant with MVCC filter and RowKey filter requirements. // For building bitmap filter, we don't need to read these packs, @@ -3147,6 +3148,8 @@ std::pair, std::vector> parseDMFilePackInfo( size_t rows = 0; UInt32 preceded_rows = 0; + auto file_provider = dm_context.global_context.getFileProvider(); + for (size_t i = 0; i < dmfiles.size(); ++i) { const auto & dmfile = dmfiles[i]; @@ -3167,7 +3170,7 @@ std::pair, std::vector> parseDMFilePackInfo( } if (handle_res[pack_id] == RSResult::Some || pack_stat.not_clean > 0 - || pack_filter->getMaxVersion(pack_id) > start_ts) + || pack_filter->getMaxVersion(dmfile, pack_id, file_provider, dm_context.scan_context) > start_ts) { // We need to read this pack to do RowKey or MVCC filter. some_packs_set->insert(pack_id); @@ -3216,7 +3219,7 @@ BitmapFilterPtr Segment::buildBitmapFilterStableOnly( return elapse_ns / 1'000'000.0; }; - auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts); + auto [skipped_ranges, some_packs_sets] = parseDMFilePackInfo(dmfiles, pack_filter_results, start_ts, dm_context); if (skipped_ranges.size() == 1 && skipped_ranges[0].offset == 0 && skipped_ranges[0].rows == segment_snap->stable->getDMFilesRows()) diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index b6606b620a2..add74ba2024 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -49,14 +49,11 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang { for (const auto & file : files_) { - auto pack_filter = DMFilePackFilter::loadFrom( + auto [file_valid_rows, file_valid_bytes] = DMFilePackFilter::loadValidRowsAndBytes( *dm_context, file, /*set_cache_if_miss*/ true, - {range}, - EMPTY_RS_OPERATOR, - {}); - auto [file_valid_rows, file_valid_bytes] = pack_filter->validRowsAndBytes(); + {range}); rows += file_valid_rows; bytes += file_valid_bytes; } From 0a97f355973dc7cb1f5d1b77a6b410af953e9bdb Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 25 Dec 2024 15:11:16 +0800 Subject: [PATCH 10/11] Rename context -> dm_context Signed-off-by: JaySon-Huang --- .../Storages/DeltaMerge/StableValueSpace.cpp | 52 ++++++++++--------- .../Storages/DeltaMerge/StableValueSpace.h | 16 +++--- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index add74ba2024..5d44c600966 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -298,7 +298,7 @@ void StableValueSpace::recordRemovePacksPages(WriteBatches & wbs) const } void StableValueSpace::calculateStableProperty( - const DMContext & context, + const DMContext & dm_context, const RowKeyRange & rowkey_range, bool is_common_handle) { @@ -333,13 +333,13 @@ void StableValueSpace::calculateStableProperty( // // If we pass `segment_range` instead, // then the returned stream is a `SkippableBlockInputStream` which will complicate the implementation - DMFileBlockInputStreamBuilder builder(context.global_context); + DMFileBlockInputStreamBuilder builder(dm_context.global_context); BlockInputStreamPtr data_stream = builder .setRowsThreshold(std::numeric_limits::max()) // because we just read one pack at a time .onlyReadOnePackEveryTime() - .setTracingID(fmt::format("{}-calculateStableProperty", context.tracing_id)) - .build(file, read_columns, RowKeyRanges{rowkey_range}, context.scan_context); + .setTracingID(fmt::format("{}-calculateStableProperty", dm_context.tracing_id)) + .build(file, read_columns, RowKeyRanges{rowkey_range}, dm_context.scan_context); auto mvcc_stream = std::make_shared>( data_stream, read_columns, @@ -366,7 +366,7 @@ void StableValueSpace::calculateStableProperty( mvcc_stream->readSuffix(); } auto pack_filter = DMFilePackFilter::loadFrom( - context, + dm_context, file, /*set_cache_if_miss*/ false, {rowkey_range}, @@ -442,7 +442,7 @@ void StableValueSpace::drop(const FileProviderPtr & file_provider) } SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( - const DMContext & context, + const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, UInt64 max_data_version, @@ -457,7 +457,8 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( { LOG_DEBUG( log, - "start_ts: {}, enable_handle_clean_read: {}, is_fast_mode: {}, enable_del_clean_read: {}", + "StableVS getInputStream" + " start_ts={} enable_handle_clean_read={} is_fast_mode={} enable_del_clean_read={}", max_data_version, enable_handle_clean_read, is_fast_scan, @@ -469,17 +470,17 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( for (size_t i = 0; i < stable->files.size(); ++i) { - DMFileBlockInputStreamBuilder builder(context.global_context); + DMFileBlockInputStreamBuilder builder(dm_context.global_context); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) - .enableColumnCacheLongTerm(context.pk_col_id) + .enableColumnCacheLongTerm(dm_context.pk_col_id) .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) - .setTracingID(context.tracing_id) + .setTracingID(dm_context.tracing_id) .setRowsThreshold(expected_block_size) .setReadPacks(read_packs.size() > i ? read_packs[i] : nullptr) .setReadTag(read_tag); - streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges, context.scan_context)); + streams.push_back(builder.build(stable->files[i], read_columns, rowkey_ranges, dm_context.scan_context)); rows.push_back(stable->files[i]->getRows()); } if (need_row_id) @@ -487,19 +488,19 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::getInputStream( return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } else { return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } } SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVectorIndex( - const DMContext & context, + const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, const ANNQueryInfoPtr & ann_query_info, @@ -516,7 +517,8 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe { LOG_DEBUG( log, - "start_ts: {}, enable_handle_clean_read: {}, is_fast_mode: {}, enable_del_clean_read: {}", + "StableVS tryGetInputStreamWithVectorIndex" + " start_ts={} enable_handle_clean_read={} is_fast_mode={} enable_del_clean_read={}", max_data_version, enable_handle_clean_read, is_fast_scan, @@ -530,13 +532,13 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe for (size_t i = 0; i < stable->files.size(); ++i) { - DMFileBlockInputStreamBuilder builder(context.global_context); + DMFileBlockInputStreamBuilder builder(dm_context.global_context); builder.enableCleanRead(enable_handle_clean_read, is_fast_scan, enable_del_clean_read, max_data_version) - .enableColumnCacheLongTerm(context.pk_col_id) + .enableColumnCacheLongTerm(dm_context.pk_col_id) .setAnnQureyInfo(ann_query_info) .setDMFilePackFilterResult(!pack_filter_results.empty() ? pack_filter_results[i] : nullptr) .setColumnCache(column_caches[i]) - .setTracingID(context.tracing_id) + .setTracingID(dm_context.tracing_id) .setRowsThreshold(expected_block_size) .setReadPacks(read_packs.size() > i ? read_packs[i] : nullptr) .setReadTag(read_tag); @@ -551,7 +553,7 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe stable->files[i], read_columns, rowkey_ranges, - context.scan_context)); + dm_context.scan_context)); rows.push_back(stable->files[i]->getRows()); } if (need_row_id) @@ -559,18 +561,18 @@ SkippableBlockInputStreamPtr StableValueSpace::Snapshot::tryGetInputStreamWithVe return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } else { return std::make_shared>( streams, std::move(rows), - context.scan_context); + dm_context.scan_context); } } -RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & context, const RowKeyRange & range) +RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const { // Avoid unnecessary reading IO @@ -586,7 +588,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & for (auto & f : stable->files) { auto filter = DMFilePackFilter::loadFrom( - context, + dm_context, f, /*set_cache_if_miss*/ false, {range}, @@ -616,7 +618,7 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & } StableValueSpace::Snapshot::AtLeastRowsAndBytesResult // -StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, const RowKeyRange & range) const +StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const { AtLeastRowsAndBytesResult ret{}; @@ -627,7 +629,7 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & context, co { const auto & file = stable->files[file_idx]; auto filter = DMFilePackFilter::loadFrom( - context, + dm_context, file, /*set_cache_if_miss*/ false, {range}, diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.h b/dbms/src/Storages/DeltaMerge/StableValueSpace.h index 8fdddbaeb48..04b87ba0a88 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.h +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.h @@ -46,8 +46,8 @@ class StableValueSpace : public std::enable_shared_from_this , log(Logger::get()) {} - static StableValueSpacePtr restore(DMContext & context, PageIdU64 id); - static StableValueSpacePtr restore(DMContext & context, ReadBuffer & buf, PageIdU64 id); + static StableValueSpacePtr restore(DMContext & dm_context, PageIdU64 id); + static StableValueSpacePtr restore(DMContext & dm_context, ReadBuffer & buf, PageIdU64 id); static StableValueSpacePtr createFromCheckpoint( // const LoggerPtr & parent_log, @@ -112,7 +112,7 @@ class StableValueSpace : public std::enable_shared_from_this */ size_t getDMFilesBytes() const; - void enableDMFilesGC(DMContext & context); + void enableDMFilesGC(DMContext & dm_context); void recordRemovePacksPages(WriteBatches & wbs) const; @@ -139,7 +139,7 @@ class StableValueSpace : public std::enable_shared_from_this const StableProperty & getStableProperty() const { return property; } - void calculateStableProperty(const DMContext & context, const RowKeyRange & rowkey_range, bool is_common_handle); + void calculateStableProperty(const DMContext & dm_context, const RowKeyRange & rowkey_range, bool is_common_handle); struct Snapshot; using SnapshotPtr = std::shared_ptr; @@ -225,7 +225,7 @@ class StableValueSpace : public std::enable_shared_from_this } SkippableBlockInputStreamPtr getInputStream( - const DMContext & context, // + const DMContext & dm_context, // const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, UInt64 max_data_version, @@ -239,7 +239,7 @@ class StableValueSpace : public std::enable_shared_from_this bool need_row_id = false); SkippableBlockInputStreamPtr tryGetInputStreamWithVectorIndex( - const DMContext & context, + const DMContext & dm_context, const ColumnDefines & read_columns, const RowKeyRanges & rowkey_ranges, const ANNQueryInfoPtr & ann_query_info, @@ -254,7 +254,7 @@ class StableValueSpace : public std::enable_shared_from_this bool need_row_id = false, BitmapFilterPtr bitmap_filter = nullptr); - RowsAndBytes getApproxRowsAndBytes(const DMContext & context, const RowKeyRange & range) const; + RowsAndBytes getApproxRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const; struct AtLeastRowsAndBytesResult { @@ -268,7 +268,7 @@ class StableValueSpace : public std::enable_shared_from_this * Get the rows and bytes calculated from packs that is **fully contained** by the given range. * If the pack is partially intersected, then it is not counted. */ - AtLeastRowsAndBytesResult getAtLeastRowsAndBytes(const DMContext & context, const RowKeyRange & range) const; + AtLeastRowsAndBytesResult getAtLeastRowsAndBytes(const DMContext & dm_context, const RowKeyRange & range) const; private: LoggerPtr log; From 154130b44e839f16deea895b2227d2ed3b21f63f Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Wed, 25 Dec 2024 15:41:39 +0800 Subject: [PATCH 11/11] Merge the validRowsAndBytes method Signed-off-by: JaySon-Huang --- .../DeltaMerge/ColumnFile/ColumnFileBig.cpp | 4 +- .../DeltaMerge/File/DMFilePackFilter.cpp | 12 +++--- .../DeltaMerge/File/DMFilePackFilter.h | 10 ++++- .../Storages/DeltaMerge/StableValueSpace.cpp | 39 ++++++------------- 4 files changed, 28 insertions(+), 37 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp index ec7d803b66f..aef83c19bca 100644 --- a/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp +++ b/dbms/src/Storages/DeltaMerge/ColumnFile/ColumnFileBig.cpp @@ -37,11 +37,13 @@ ColumnFileBig::ColumnFileBig(const DMContext & dm_context, const DMFilePtr & fil void ColumnFileBig::calculateStat(const DMContext & dm_context) { - std::tie(valid_rows, valid_bytes) = DMFilePackFilter::loadValidRowsAndBytes( + auto m = DMFilePackFilter::loadValidRowsAndBytes( dm_context, file, /*set_cache_if_miss*/ false, {segment_range}); + valid_rows = m.match_rows; + valid_bytes = m.match_bytes; } void ColumnFileBig::removeData(WriteBatches & wbs) const diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 3f8ea38d174..d18c79108d8 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -25,7 +25,7 @@ namespace DB::DM { -std::pair DMFilePackFilter::loadValidRowsAndBytes( +DMFilePackFilter::MatchDetails DMFilePackFilter::loadValidRowsAndBytes( const DMContext & dm_context, const DMFilePtr & dmfile, bool set_cache_if_miss, @@ -33,18 +33,18 @@ std::pair DMFilePackFilter::loadValidRowsAndBytes( { auto pack_filter = loadFrom(dm_context, dmfile, set_cache_if_miss, rowkey_ranges, EMPTY_RS_OPERATOR, {}); - size_t rows = 0; - size_t bytes = 0; + MatchDetails res; const auto & pack_stats = dmfile->getPackStats(); for (size_t i = 0; i < pack_stats.size(); ++i) { if (pack_filter->pack_res[i].isUse()) { - rows += pack_stats[i].rows; - bytes += pack_stats[i].bytes; + res.match_packs += 1; + res.match_rows += pack_stats[i].rows; + res.match_bytes += pack_stats[i].bytes; } } - return {rows, bytes}; + return res; } DMFilePackFilterResultPtr DMFilePackFilter::load() diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 5099d4fae61..375608cfac6 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -39,8 +39,14 @@ class DMFilePackFilter friend class DMFilePackFilterResult; public: - // Get valid rows and bytes after filter invalid packs by rowkey_ranges - static std::pair loadValidRowsAndBytes( + struct MatchDetails + { + size_t match_packs = 0; + size_t match_rows = 0; + size_t match_bytes = 0; + }; + // Get approximate valid rows and bytes after filter invalid packs by rowkey_ranges + static MatchDetails loadValidRowsAndBytes( const DMContext & dm_context, const DMFilePtr & dmfile, bool set_cache_if_miss, diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index 5d44c600966..b4913ea1fbe 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -49,13 +49,13 @@ void StableValueSpace::setFiles(const DMFiles & files_, const RowKeyRange & rang { for (const auto & file : files_) { - auto [file_valid_rows, file_valid_bytes] = DMFilePackFilter::loadValidRowsAndBytes( + auto match = DMFilePackFilter::loadValidRowsAndBytes( *dm_context, file, /*set_cache_if_miss*/ true, {range}); - rows += file_valid_rows; - bytes += file_valid_bytes; + rows += match.match_rows; + bytes += match.match_bytes; } } @@ -585,26 +585,13 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & // Usually, this method will be called for some "cold" key ranges. // Loading the index into cache may pollute the cache and make the hot index cache invalid. // So don't refill the cache if the index does not exist. + constexpr bool set_cache_if_miss = false; for (auto & f : stable->files) { - auto filter = DMFilePackFilter::loadFrom( - dm_context, - f, - /*set_cache_if_miss*/ false, - {range}, - RSOperatorPtr{}, - IdSetPtr{}); - const auto & pack_stats = f->getPackStats(); - const auto & pack_res = filter->getPackRes(); - for (size_t i = 0; i < pack_stats.size(); ++i) - { - if (pack_res[i].isUse()) - { - ++match_packs; - total_match_rows += pack_stats[i].rows; - total_match_bytes += pack_stats[i].bytes; - } - } + auto match = DMFilePackFilter::loadValidRowsAndBytes(dm_context, f, set_cache_if_miss, {range}); + match_packs += match.match_packs; + total_match_rows += match.match_rows; + total_match_bytes += match.match_bytes; } if (!total_match_rows || !match_packs) return {0, 0}; @@ -625,16 +612,12 @@ StableValueSpace::Snapshot::getAtLeastRowsAndBytes(const DMContext & dm_context, // Usually, this method will be called for some "cold" key ranges. // Loading the index into cache may pollute the cache and make the hot index cache invalid. // So don't refill the cache if the index does not exist. + constexpr bool set_cache_if_miss = false; for (size_t file_idx = 0; file_idx < stable->files.size(); ++file_idx) { const auto & file = stable->files[file_idx]; - auto filter = DMFilePackFilter::loadFrom( - dm_context, - file, - /*set_cache_if_miss*/ false, - {range}, - RSOperatorPtr{}, - IdSetPtr{}); + auto filter + = DMFilePackFilter::loadFrom(dm_context, file, set_cache_if_miss, {range}, RSOperatorPtr{}, IdSetPtr{}); const auto & handle_filter_result = filter->getHandleRes(); if (file_idx == 0) {