From bc679038ca8730afa63a8d3b83ce7c34c8985e63 Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 22 Nov 2024 11:36:12 +0800 Subject: [PATCH] [Bug](runtime-filter) avoid ignore rf multiple times (#44408) ### What problem does this PR solve? avoid ignore rf multiple times When an instance is wake_up_by_downstream, it will destroy the logic of `first closing the instance that should build hash table and then closing other instances`, instance that should build hash table during the process of inserting data into rf, this instance may find that rf is ignored, causing dcheck to fail. ```cpp F20241121 12:01:02.245405 9832 runtime_filter.cpp:395] Check failed: !is_ignored() 3# raise at ../sysdeps/posix/raise.c:27 4# abort at ./stdlib/abort.c:81 5# 0x00005603CFDF778D in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 6# 0x00005603CFDE9DCA in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 7# google::LogMessage::SendToLog() in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 8# google::LogMessage::Flush() in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 9# google::LogMessageFatal::~LogMessageFatal() in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 10# doris::RuntimePredicateWrapper::insert_fixed_len(COW::immutable_ptr const&, unsigned long) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/exprs/runtime_filter.cpp:395 11# doris::RuntimePredicateWrapper::insert_batch(COW::immutable_ptr const&, unsigned long) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/exprs/runtime_filter.cpp:431 12# doris::IRuntimeFilter::insert_batch(COW::immutable_ptr, unsigned long) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/exprs/runtime_filter.cpp:992 13# doris::VRuntimeFilterSlots::insert(doris::vectorized::Block const*) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/exprs/runtime_filter_slots.h:146 14# doris::pipeline::HashJoinBuildSinkLocalState::close(doris::RuntimeState*, doris::Status) in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 15# doris::pipeline::DataSinkOperatorXBase::close(doris::RuntimeState*, doris::Status) in /mnt/ssd01/doris-branch40preview/NEREIDS_ASAN/be/lib/doris_be 16# doris::pipeline::PipelineTask::close(doris::Status) at /home/zcp/repo_center/doris_branch-3.0/doris/be/src/pipeline/pipeline_task.cpp:487 ``` --- be/src/pipeline/exec/hashjoin_build_sink.cpp | 55 ++++++++++---------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index bf177742ab3563..7d3f4da935099e 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -112,23 +112,24 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu if (_closed) { return Status::OK(); } - auto p = _parent->cast(); Defer defer {[&]() { - if (_should_build_hash_table) { - // The build side hash key column maybe no need output, but we need to keep the column in block - // because it is used to compare with probe side hash key column - if (p._should_keep_hash_key_column && _build_col_ids.size() == 1) { - p._should_keep_column_flags[_build_col_ids[0]] = true; - } + if (!_should_build_hash_table) { + return; + } + // The build side hash key column maybe no need output, but we need to keep the column in block + // because it is used to compare with probe side hash key column + auto p = _parent->cast(); + if (p._should_keep_hash_key_column && _build_col_ids.size() == 1) { + p._should_keep_column_flags[_build_col_ids[0]] = true; + } - if (_shared_state->build_block) { - // release the memory of unused column in probe stage - _shared_state->build_block->clear_column_mem_not_keep( - p._should_keep_column_flags, bool(p._shared_hashtable_controller)); - } + if (_shared_state->build_block) { + // release the memory of unused column in probe stage + _shared_state->build_block->clear_column_mem_not_keep( + p._should_keep_column_flags, bool(p._shared_hashtable_controller)); } - if (_should_build_hash_table && p._shared_hashtable_controller) { + if (p._shared_hashtable_controller) { p._shared_hashtable_controller->signal_finish(p.node_id()); } }}; @@ -137,22 +138,22 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu return Base::close(state, exec_status); } - if (state->get_task()->wake_up_by_downstream()) { - RETURN_IF_ERROR(_runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); - RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); - } else { - auto* block = _shared_state->build_block.get(); - uint64_t hash_table_size = block ? block->rows() : 0; - { - SCOPED_TIMER(_runtime_filter_init_timer); - if (_should_build_hash_table) { + if (_should_build_hash_table) { + if (state->get_task()->wake_up_by_downstream()) { + RETURN_IF_ERROR(_runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); + } else { + auto* block = _shared_state->build_block.get(); + uint64_t hash_table_size = block ? block->rows() : 0; + { + SCOPED_TIMER(_runtime_filter_init_timer); RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); + } + if (hash_table_size > 1) { + SCOPED_TIMER(_runtime_filter_compute_timer); + _runtime_filter_slots->insert(block); } - RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); - } - if (_should_build_hash_table && hash_table_size > 1) { - SCOPED_TIMER(_runtime_filter_compute_timer); - _runtime_filter_slots->insert(block); } } SCOPED_TIMER(_publish_runtime_filter_timer);