From d6ecea4cd1d64313f88b2252efca6a5672193a00 Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Sat, 12 Oct 2024 22:29:50 +0800 Subject: [PATCH] [fix](delete) Fix potential delete job stuck util timeout if exception happend in FE DeleteJob execution (#41672) (#41763) pick: #41672 Fail task should also count down for the count down latch to prevent job stuck. --- .../src/main/java/org/apache/doris/master/MasterImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 3e63a5421f798f..27469301e17615 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -423,6 +423,9 @@ private void finishRealtimePush(AgentTask task, TFinishTaskRequest request) thro } catch (MetaNotFoundException e) { AgentTaskQueue.removeTask(backendId, TTaskType.REALTIME_PUSH, signature); LOG.warn("finish push replica error", e); + if (pushTask.getPushType() == TPushType.DELETE) { + pushTask.countDownLatch(backendId, pushTabletId); + } } finally { olapTable.writeUnlock(); }