Skip to content

Commit

Permalink
[Fix](merge-on-write) Add defensive check before partial update (#44687)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

as stated in #44692, Some user can
create Duplicate keys table with `enable_unique_key_merge_on_write=true`
successfully in version 1.2. After they upgrade to higher version, the
incorrect table property still remains. Some logic in BE treat such
table as an unique key mow table, makes a wrong query plan and BE may do
partial update on duplicate table, which causes BE core.
This PR add checks before doing partial update and return error when
some preconditions are not met to prevent coredump.
  • Loading branch information
bobhan1 authored Dec 6, 2024
1 parent da9b818 commit 08d1e76
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 14 deletions.
39 changes: 34 additions & 5 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,39 @@ Status SegmentWriter::probe_key_for_mow(
return Status::OK();
}

Status SegmentWriter::partial_update_preconditions_check(size_t row_pos) {
if (!_is_mow()) {
auto msg = fmt::format(
"Can only do partial update on merge-on-write unique table, but found: "
"keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}",
_tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write,
_tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
if (_opts.rowset_ctx->partial_update_info == nullptr) {
auto msg =
fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}",
_tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
if (!_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()) {
auto msg = fmt::format(
"in fixed partial update code, but update_mode={}, please check, tablet_id={}",
_opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
if (row_pos != 0) {
auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos,
_tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
return Status::OK();
}

// for partial update, we should do following steps to fill content of block:
// 1. set block data to data convertor, and get all key_column's converted slice
// 2. get pk of input block, and read missing columns
Expand All @@ -562,11 +595,7 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
block->columns(), _tablet_schema->num_key_columns(),
_tablet_schema->num_columns()));
}
DCHECK(_is_mow());

DCHECK(_opts.rowset_ctx->partial_update_info);
DCHECK(_opts.rowset_ctx->partial_update_info->is_fixed_partial_update());
DCHECK(row_pos == 0);
RETURN_IF_ERROR(partial_update_preconditions_check(row_pos));

// find missing column cids
const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids;
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/rowset/segment_v2/segment_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class SegmentWriter {
const std::function<void(const RowLocation& loc)>& found_cb,
const std::function<Status()>& not_found_cb,
PartialUpdateStats& stats);
Status partial_update_preconditions_check(size_t row_pos);
Status append_block_with_partial_content(const vectorized::Block* block, size_t row_pos,
size_t num_rows);
Status append_block_with_variant_subcolumns(vectorized::Block& data);
Expand Down
56 changes: 47 additions & 9 deletions be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,51 @@ Status VerticalSegmentWriter::_probe_key_for_mow(
return Status::OK();
}

Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t row_pos,
bool is_flexible_update) {
if (!_is_mow()) {
auto msg = fmt::format(
"Can only do partial update on merge-on-write unique table, but found: "
"keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}",
_tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write,
_tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
if (_opts.rowset_ctx->partial_update_info == nullptr) {
auto msg =
fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}",
_tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
if (!is_flexible_update) {
if (!_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()) {
auto msg = fmt::format(
"in fixed partial update code, but update_mode={}, please check, tablet_id={}",
_opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
} else {
if (!_opts.rowset_ctx->partial_update_info->is_flexible_partial_update()) {
auto msg = fmt::format(
"in flexible partial update code, but update_mode={}, please check, "
"tablet_id={}",
_opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
}
if (row_pos != 0) {
auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos,
_tablet->tablet_id());
DCHECK(false) << msg;
return Status::InternalError<false>(msg);
}
return Status::OK();
}

// for partial update, we should do following steps to fill content of block:
// 1. set block data to data convertor, and get all key_column's converted slice
// 2. get pk of input block, and read missing columns
Expand All @@ -427,11 +472,7 @@ Status VerticalSegmentWriter::_probe_key_for_mow(
// 3. set columns to data convertor and then write all columns
Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& data,
vectorized::Block& full_block) {
DCHECK(_is_mow());
DCHECK(_opts.rowset_ctx->partial_update_info != nullptr);
DCHECK(_opts.rowset_ctx->partial_update_info->is_fixed_partial_update());
DCHECK(data.row_pos == 0);

RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos, false));
// create full block and fill with input columns
full_block = _tablet_schema->create_block();
const auto& including_cids = _opts.rowset_ctx->partial_update_info->update_cids;
Expand Down Expand Up @@ -580,10 +621,7 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da

Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(
RowsInBlock& data, vectorized::Block& full_block) {
DCHECK(_is_mow());
DCHECK(_opts.rowset_ctx->partial_update_info != nullptr);
DCHECK(_opts.rowset_ctx->partial_update_info->is_flexible_partial_update());
DCHECK(data.row_pos == 0);
RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos, true));

// data.block has the same schema with full_block
DCHECK(data.block->columns() == _tablet_schema->num_columns());
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/rowset/segment_v2/vertical_segment_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ class VerticalSegmentWriter {
const std::function<void(const RowLocation& loc)>& found_cb,
const std::function<Status()>& not_found_cb,
PartialUpdateStats& stats);
Status _partial_update_preconditions_check(size_t row_pos, bool is_flexible_update);
Status _append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block);
Status _append_block_with_flexible_partial_content(RowsInBlock& data,
vectorized::Block& full_block);
Expand Down

0 comments on commit 08d1e76

Please sign in to comment.