From 3a816b588b89e01b2b2a16033c026c21b32eb61e Mon Sep 17 00:00:00 2001 From: YangKeao Date: Wed, 25 Dec 2024 20:25:15 +0800 Subject: [PATCH 1/2] This is an automated cherry-pick of #58539 Signed-off-by: ti-chi-bot --- pkg/ttl/ttlworker/job_manager.go | 13 +++++ .../ttlworker/job_manager_integration_test.go | 48 +++++++++++++++++++ pkg/ttl/ttlworker/job_manager_test.go | 13 +++++ 3 files changed, 74 insertions(+) diff --git a/pkg/ttl/ttlworker/job_manager.go b/pkg/ttl/ttlworker/job_manager.go index e79de316c98fb..8212c031d77eb 100644 --- a/pkg/ttl/ttlworker/job_manager.go +++ b/pkg/ttl/ttlworker/job_manager.go @@ -67,7 +67,16 @@ const ttlJobHistoryGCTemplate = `DELETE FROM mysql.tidb_ttl_job_history WHERE cr const ttlTableStatusGCWithoutIDTemplate = `DELETE FROM mysql.tidb_ttl_table_status WHERE current_job_status IS NULL` const ttlTableStatusGCWithIDTemplate = ttlTableStatusGCWithoutIDTemplate + ` AND table_id NOT IN (%s)` +<<<<<<< HEAD const timeFormat = time.DateTime +======= +// don't remove the rows for non-exist tables directly. Instead, set them to cancelled. In some special situations, the TTL job may still be able +// to finish correctly. If that happen, the status will be updated from 'cancelled' to 'finished' in `(*ttlJob).finish` +const ttlJobHistoryGCNonExistTableTemplate = `UPDATE mysql.tidb_ttl_job_history SET status = 'cancelled' + WHERE table_id NOT IN (SELECT table_id FROM mysql.tidb_ttl_table_status) AND status = 'running'` + +var timeFormat = time.DateTime +>>>>>>> 2a72e7f012b (ttl: set the job history status to `cancelled` if it's removed in GC and it's still running (#58539)) func insertNewTableIntoStatusSQL(tableID int64, parentTableID int64) (string, []any) { return insertNewTableIntoStatusTemplate, []any{tableID, parentTableID} @@ -1052,6 +1061,10 @@ func (m *JobManager) DoGC(ctx context.Context, se session.Session) { if _, err := se.ExecuteSQL(ctx, ttlJobHistoryGCTemplate); err != nil { logutil.Logger(ctx).Warn("fail to gc ttl job history", zap.Error(err)) } + + if _, err := se.ExecuteSQL(ctx, ttlJobHistoryGCNonExistTableTemplate); err != nil { + logutil.Logger(ctx).Warn("fail to gc ttl job history for non-exist table", zap.Error(err)) + } } // GetDelayMetricRecords gets the records of TTL delay metrics diff --git a/pkg/ttl/ttlworker/job_manager_integration_test.go b/pkg/ttl/ttlworker/job_manager_integration_test.go index 69624a1c2890c..935e73d9894cf 100644 --- a/pkg/ttl/ttlworker/job_manager_integration_test.go +++ b/pkg/ttl/ttlworker/job_manager_integration_test.go @@ -1499,3 +1499,51 @@ func TestDisableTTLAfterLoseHeartbeat(t *testing.T) { // the job should have been cancelled tk.MustQuery("select current_job_status from mysql.tidb_ttl_table_status").Check(testkit.Rows("")) } + +func TestTimerJobAfterDropTable(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + waitAndStopTTLManager(t, dom) + + pool := wrapPoolForTest(dom.SysSessionPool()) + + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t (created_at datetime) TTL = created_at + INTERVAL 1 HOUR") + tbl, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t")) + require.NoError(t, err) + m := ttlworker.NewJobManager("test-job-manager", pool, store, nil, func() bool { return true }) + + se, err := ttlworker.GetSessionForTest(pool) + require.NoError(t, err) + defer se.Close() + + // First, schedule the job. The row in the `tidb_ttl_table_status` and `tidb_ttl_job_history` will be created + jobID := "test-job-id" + + require.NoError(t, m.InfoSchemaCache().Update(se)) + err = m.SubmitJob(se, tbl.Meta().ID, tbl.Meta().ID, jobID) + require.NoError(t, err) + now := se.Now() + tk.MustQuery("select count(*) from mysql.tidb_ttl_table_status").Check(testkit.Rows("1")) + tk.MustQuery("select count(*) from mysql.tidb_ttl_job_history").Check(testkit.Rows("1")) + + // Drop the table, then the `m` somehow lost heartbeat for 2*heartbeat interval, and GC TTL jobs + tk.MustExec("drop table t") + + now = now.Add(time.Hour * 2) + m.DoGC(context.Background(), se, now) + tk.MustQuery("select count(*) from mysql.tidb_ttl_table_status").Check(testkit.Rows("0")) + tk.MustQuery("select status from mysql.tidb_ttl_job_history").Check(testkit.Rows("cancelled")) + + require.NoError(t, m.TableStatusCache().Update(context.Background(), se)) + require.NoError(t, m.InfoSchemaCache().Update(se)) + m.CheckNotOwnJob() + require.Len(t, m.RunningJobs(), 0) + + // The adapter should not return the job + adapter := ttlworker.NewManagerJobAdapter(store, pool, nil) + job, err := adapter.GetJob(context.Background(), tbl.Meta().ID, tbl.Meta().ID, jobID) + require.NoError(t, err) + require.NotNil(t, job) + require.True(t, job.Finished) +} diff --git a/pkg/ttl/ttlworker/job_manager_test.go b/pkg/ttl/ttlworker/job_manager_test.go index 1294ab1d973a8..1ff423c453002 100644 --- a/pkg/ttl/ttlworker/job_manager_test.go +++ b/pkg/ttl/ttlworker/job_manager_test.go @@ -203,6 +203,19 @@ func (m *JobManager) ReportMetrics(se session.Session) { m.reportMetrics(se) } +<<<<<<< HEAD +======= +// ID returns the id of JobManager +func (m *JobManager) ID() string { + return m.id +} + +// CheckNotOwnJob is an exported version of checkNotOwnJob +func (m *JobManager) CheckNotOwnJob() { + m.checkNotOwnJob() +} + +>>>>>>> 2a72e7f012b (ttl: set the job history status to `cancelled` if it's removed in GC and it's still running (#58539)) // CheckFinishedJob is an exported version of checkFinishedJob func (m *JobManager) CheckFinishedJob(se session.Session) { m.checkFinishedJob(se) From 203ef5cc1b7108c4b5c4cbb07d803012028bb48e Mon Sep 17 00:00:00 2001 From: Yang Keao Date: Mon, 6 Jan 2025 17:00:22 +0800 Subject: [PATCH 2/2] fix conflict Signed-off-by: Yang Keao --- pkg/ttl/ttlworker/job_manager.go | 6 +----- pkg/ttl/ttlworker/job_manager_test.go | 8 -------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/pkg/ttl/ttlworker/job_manager.go b/pkg/ttl/ttlworker/job_manager.go index 8212c031d77eb..1835f73143bf0 100644 --- a/pkg/ttl/ttlworker/job_manager.go +++ b/pkg/ttl/ttlworker/job_manager.go @@ -67,17 +67,13 @@ const ttlJobHistoryGCTemplate = `DELETE FROM mysql.tidb_ttl_job_history WHERE cr const ttlTableStatusGCWithoutIDTemplate = `DELETE FROM mysql.tidb_ttl_table_status WHERE current_job_status IS NULL` const ttlTableStatusGCWithIDTemplate = ttlTableStatusGCWithoutIDTemplate + ` AND table_id NOT IN (%s)` -<<<<<<< HEAD const timeFormat = time.DateTime -======= + // don't remove the rows for non-exist tables directly. Instead, set them to cancelled. In some special situations, the TTL job may still be able // to finish correctly. If that happen, the status will be updated from 'cancelled' to 'finished' in `(*ttlJob).finish` const ttlJobHistoryGCNonExistTableTemplate = `UPDATE mysql.tidb_ttl_job_history SET status = 'cancelled' WHERE table_id NOT IN (SELECT table_id FROM mysql.tidb_ttl_table_status) AND status = 'running'` -var timeFormat = time.DateTime ->>>>>>> 2a72e7f012b (ttl: set the job history status to `cancelled` if it's removed in GC and it's still running (#58539)) - func insertNewTableIntoStatusSQL(tableID int64, parentTableID int64) (string, []any) { return insertNewTableIntoStatusTemplate, []any{tableID, parentTableID} } diff --git a/pkg/ttl/ttlworker/job_manager_test.go b/pkg/ttl/ttlworker/job_manager_test.go index 1ff423c453002..ec24c91d5c679 100644 --- a/pkg/ttl/ttlworker/job_manager_test.go +++ b/pkg/ttl/ttlworker/job_manager_test.go @@ -203,19 +203,11 @@ func (m *JobManager) ReportMetrics(se session.Session) { m.reportMetrics(se) } -<<<<<<< HEAD -======= -// ID returns the id of JobManager -func (m *JobManager) ID() string { - return m.id -} - // CheckNotOwnJob is an exported version of checkNotOwnJob func (m *JobManager) CheckNotOwnJob() { m.checkNotOwnJob() } ->>>>>>> 2a72e7f012b (ttl: set the job history status to `cancelled` if it's removed in GC and it's still running (#58539)) // CheckFinishedJob is an exported version of checkFinishedJob func (m *JobManager) CheckFinishedJob(se session.Session) { m.checkFinishedJob(se)