From ebb21ef03102e6160c7900f1be81149c6319abeb Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Fri, 6 Dec 2024 17:16:42 +0800 Subject: [PATCH] branch-2.1: [Fix](merge-on-write) Add defensive check before partial update #44687 (#45086) pick https://github.com/apache/doris/pull/44687 --- .../olap/rowset/segment_v2/segment_writer.cpp | 42 +++++++++++++++++-- .../olap/rowset/segment_v2/segment_writer.h | 2 + .../segment_v2/vertical_segment_writer.cpp | 40 +++++++++++++++++- .../segment_v2/vertical_segment_writer.h | 2 + 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index e1696340a361b7..7e5a51f55ce12e 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -354,6 +354,40 @@ void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) { << watch.elapsed_time() / 1000; } +Status SegmentWriter::partial_update_preconditions_check(size_t row_pos) { + if (!_is_mow()) { + auto msg = fmt::format( + "Can only do partial update on merge-on-write unique table, but found: " + "keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}", + _tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (_opts.rowset_ctx->partial_update_info == nullptr) { + auto msg = + fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (!_opts.rowset_ctx->partial_update_info->is_partial_update) { + auto msg = fmt::format( + "in fixed partial update code, but is_partial_update=false, please check, " + "tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (row_pos != 0) { + auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + return Status::OK(); +} + // for partial update, we should do following steps to fill content of block: // 1. set block data to data convertor, and get all key_column's converted slice // 2. get pk of input block, and read missing columns @@ -372,9 +406,7 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* block->columns(), _tablet_schema->num_key_columns(), _tablet_schema->num_columns())); } - DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write); - - DCHECK(_opts.rowset_ctx->partial_update_info); + RETURN_IF_ERROR(partial_update_preconditions_check(row_pos)); // find missing column cids const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; const auto& including_cids = _opts.rowset_ctx->partial_update_info->update_cids; @@ -1281,5 +1313,9 @@ Status SegmentWriter::_generate_short_key_index( return Status::OK(); } +inline bool SegmentWriter::_is_mow() const { + return _tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write; +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 8f9e09ed01a406..5f5ab7bc7feeda 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -96,6 +96,7 @@ class SegmentWriter { Status append_row(const RowType& row); Status append_block(const vectorized::Block* block, size_t row_pos, size_t num_rows); + Status partial_update_preconditions_check(size_t row_pos); Status append_block_with_partial_content(const vectorized::Block* block, size_t row_pos, size_t num_rows); @@ -180,6 +181,7 @@ class SegmentWriter { vectorized::IOlapColumnDataAccessor* seq_column, size_t num_rows, bool need_sort); Status _generate_short_key_index(std::vector& key_columns, size_t num_rows, const std::vector& short_key_pos); + bool _is_mow() const; private: uint32_t _segment_id; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 0feb769638ae56..1be1f1ec180bda 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -304,6 +304,39 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(vectorized::Block& bl << watch.elapsed_time() / 1000; } +Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t row_pos) { + if (!_is_mow()) { + auto msg = fmt::format( + "Can only do partial update on merge-on-write unique table, but found: " + "keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}", + _tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (_opts.rowset_ctx->partial_update_info == nullptr) { + auto msg = + fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (!_opts.rowset_ctx->partial_update_info->is_partial_update) { + auto msg = fmt::format( + "in partial update code, but is_partial_update=false, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (row_pos != 0) { + auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + return Status::OK(); +} + // for partial update, we should do following steps to fill content of block: // 1. set block data to data convertor, and get all key_column's converted slice // 2. get pk of input block, and read missing columns @@ -318,8 +351,7 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da return Status::NotSupported("append_block_with_partial_content"); } - DCHECK(_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write); - DCHECK(_opts.rowset_ctx->partial_update_info != nullptr); + RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos)); auto tablet = static_cast(_tablet.get()); // create full block and fill with input columns @@ -1209,5 +1241,9 @@ void VerticalSegmentWriter::_set_max_key(const Slice& key) { _max_key.append(key.get_data(), key.get_size()); } +inline bool VerticalSegmentWriter::_is_mow() const { + return _tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write; +} + } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h index 8fd854c3e95f4a..1713ece23e6d94 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h @@ -149,12 +149,14 @@ class VerticalSegmentWriter { void _set_min_key(const Slice& key); void _set_max_key(const Slice& key); void _serialize_block_to_row_column(vectorized::Block& block); + Status _partial_update_preconditions_check(size_t row_pos); Status _append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block); Status _append_block_with_variant_subcolumns(RowsInBlock& data); Status _fill_missing_columns(vectorized::MutableColumns& mutable_full_columns, const std::vector& use_default_or_null_flag, bool has_default_or_nullable, const size_t& segment_start_pos, const vectorized::Block* block); + bool _is_mow() const; private: uint32_t _segment_id;