diff --git a/br/cmd/br/backup.go b/br/cmd/br/backup.go index 1ca18ab7aaa57..5257e90813dd8 100644 --- a/br/cmd/br/backup.go +++ b/br/cmd/br/backup.go @@ -154,7 +154,7 @@ func newFullBackupCommand() *cobra.Command { return runBackupCommand(command, task.FullBackupCmd) }, } - task.DefineFilterFlags(command, acceptAllTables, false) + task.DefineFilterFlags(command, acceptAllTables, acceptAllTables, false) task.DefineBackupEBSFlags(command.PersistentFlags()) return command } diff --git a/br/cmd/br/cmd.go b/br/cmd/br/cmd.go index 695d9975717a9..6c74bab54849e 100644 --- a/br/cmd/br/cmd.go +++ b/br/cmd/br/cmd.go @@ -37,9 +37,9 @@ var ( tidbGlue = gluetidb.New() envLogToTermKey = "BR_LOG_TO_TERM" - filterOutSysAndMemTables = []string{ + filterOutSysAndMemKeepPrivilege = []string{ "*.*", - fmt.Sprintf("!%s.*", utils.TemporaryDBName("*")), + fmt.Sprintf("!%s.*", utils.WithTemporaryDBNamePrefix("*")), "!mysql.*", "mysql.bind_info", "mysql.user", diff --git a/br/cmd/br/restore.go b/br/cmd/br/restore.go index a37f55b904ca4..974d4847b6123 100644 --- a/br/cmd/br/restore.go +++ b/br/cmd/br/restore.go @@ -186,7 +186,8 @@ func newFullRestoreCommand() *cobra.Command { return runRestoreCommand(cmd, task.FullRestoreCmd) }, } - task.DefineFilterFlags(command, filterOutSysAndMemTables, false) + // default only restore some system tables + task.DefineFilterFlags(command, acceptAllTables, filterOutSysAndMemKeepPrivilege, false) task.DefineRestoreSnapshotFlags(command) return command } @@ -254,7 +255,8 @@ func newStreamRestoreCommand() *cobra.Command { return runRestoreCommand(command, task.PointRestoreCmd) }, } - task.DefineFilterFlags(command, filterOutSysAndMemTables, true) + // default restore only some system tables + task.DefineFilterFlags(command, acceptAllTables, filterOutSysAndMemKeepPrivilege, true) task.DefineStreamRestoreFlags(command) return command } diff --git a/br/cmd/br/stream.go b/br/cmd/br/stream.go index 75055886bb74e..52496bd48c15b 100644 --- a/br/cmd/br/stream.go +++ b/br/cmd/br/stream.go @@ -71,7 +71,7 @@ func newStreamStartCommand() *cobra.Command { }, } - task.DefineFilterFlags(command, acceptAllTables, true) + task.DefineFilterFlags(command, acceptAllTables, acceptAllTables, true) task.DefineStreamStartFlags(command.Flags()) return command } diff --git a/br/pkg/backup/BUILD.bazel b/br/pkg/backup/BUILD.bazel index 2febe37641b01..ae04c6e7302c2 100644 --- a/br/pkg/backup/BUILD.bazel +++ b/br/pkg/backup/BUILD.bazel @@ -34,7 +34,6 @@ go_library( "//pkg/statistics/handle", "//pkg/statistics/util", "//pkg/util", - "//pkg/util/table-filter", "@com_github_google_btree//:btree", "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_pingcap_errors//:errors", diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 6ad03dd45cebc..845439c7595d7 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -37,7 +37,6 @@ import ( "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/util" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/tikv/client-go/v2/oracle" "github.com/tikv/client-go/v2/txnkv/txnlock" pd "github.com/tikv/pd/client" @@ -675,11 +674,11 @@ func (bc *Client) SetApiVersion(v kvrpcpb.APIVersion) { bc.apiVersion = v } -// Client.BuildBackupRangeAndSchema calls BuildBackupRangeAndSchema, +// BuildBackupRangeAndSchema calls BuildBackupRangeAndSchema, // if the checkpoint mode is used, return the ranges from checkpoint meta func (bc *Client) BuildBackupRangeAndSchema( storage kv.Storage, - tableFilter filter.Filter, + tableFilter *utils.CombinedFilter, backupTS uint64, isFullBackup bool, ) ([]rtree.Range, *Schemas, []*backuppb.PlacementPolicy, error) { @@ -714,12 +713,12 @@ func CheckBackupStorageIsLocked(ctx context.Context, s storage.ExternalStorage) return nil } -// BuildBackupRangeAndSchema gets KV range and schema of tables. +// BuildBackupRangeAndInitSchema gets KV range and schema of tables. // KV ranges are separated by Table IDs. // Also, KV ranges are separated by Index IDs in the same table. func BuildBackupRangeAndInitSchema( storage kv.Storage, - tableFilter filter.Filter, + tableFilter *utils.CombinedFilter, backupTS uint64, isFullBackup bool, buildRange bool, @@ -815,7 +814,7 @@ func BuildBackupRangeAndInitSchema( func BuildBackupSchemas( storage kv.Storage, - tableFilter filter.Filter, + tableFilter *utils.CombinedFilter, backupTS uint64, isFullBackup bool, fn func(dbInfo *model.DBInfo, tableInfo *model.TableInfo), diff --git a/br/pkg/backup/schema.go b/br/pkg/backup/schema.go index 4857b498b405c..216a7aad3c8f7 100644 --- a/br/pkg/backup/schema.go +++ b/br/pkg/backup/schema.go @@ -102,7 +102,7 @@ func (ss *Schemas) BackupSchemas( } if utils.IsSysDB(schema.dbInfo.Name.L) { - schema.dbInfo.Name = utils.TemporaryDBName(schema.dbInfo.Name.O) + schema.dbInfo.Name = utils.WithTemporaryDBNamePrefix(schema.dbInfo.Name.O) } var checksum *checkpoint.ChecksumItem diff --git a/br/pkg/backup/schema_test.go b/br/pkg/backup/schema_test.go index b45a96fb54970..ee54aebb8fe3f 100644 --- a/br/pkg/backup/schema_test.go +++ b/br/pkg/backup/schema_test.go @@ -105,27 +105,29 @@ func TestBuildBackupRangeAndSchema(t *testing.T) { tk := testkit.NewTestKit(t, m.Storage) // Table t1 is not exist. - testFilter, err := filter.Parse([]string{"test.t1"}) + userFilter, err := filter.Parse([]string{"test.t1"}) + combinedFilter1 := utils.NewCombinedFilterNoSystem(userFilter) require.NoError(t, err) _, backupSchemas, _, err := backup.BuildBackupRangeAndInitSchema( - m.Storage, testFilter, math.MaxUint64, false, true) + m.Storage, combinedFilter1, math.MaxUint64, false, true) require.NoError(t, err) require.NotNil(t, backupSchemas) // Database is not exist. fooFilter, err := filter.Parse([]string{"foo.t1"}) + combinedFilter2 := utils.NewCombinedFilterNoSystem(fooFilter) require.NoError(t, err) _, backupSchemas, _, err = backup.BuildBackupRangeAndInitSchema( - m.Storage, fooFilter, math.MaxUint64, false, true) + m.Storage, combinedFilter2, math.MaxUint64, false, true) require.NoError(t, err) require.Nil(t, backupSchemas) // Empty database. // Filter out system tables manually. - noFilter, err := filter.Parse([]string{"*.*", "!mysql.*", "!sys.*"}) + combinedFilter3 := utils.NewCombinedFilterRejectAll() require.NoError(t, err) _, backupSchemas, _, err = backup.BuildBackupRangeAndInitSchema( - m.Storage, noFilter, math.MaxUint64, false, true) + m.Storage, combinedFilter3, math.MaxUint64, false, true) require.NoError(t, err) require.NotNil(t, backupSchemas) @@ -137,7 +139,7 @@ func TestBuildBackupRangeAndSchema(t *testing.T) { var policies []*backuppb.PlacementPolicy _, backupSchemas, policies, err = backup.BuildBackupRangeAndInitSchema( - m.Storage, testFilter, math.MaxUint64, false, true) + m.Storage, combinedFilter1, math.MaxUint64, false, true) require.NoError(t, err) require.Equal(t, 1, backupSchemas.Len()) // we expect no policies collected, because it's not full backup. @@ -170,7 +172,7 @@ func TestBuildBackupRangeAndSchema(t *testing.T) { tk.MustExec("insert into t2 values (11);") _, backupSchemas, policies, err = backup.BuildBackupRangeAndInitSchema( - m.Storage, noFilter, math.MaxUint64, true, true) + m.Storage, combinedFilter3, math.MaxUint64, true, true) require.NoError(t, err) require.Equal(t, 2, backupSchemas.Len()) // we expect the policy fivereplicas collected in full backup. @@ -217,9 +219,10 @@ func TestBuildBackupRangeAndSchemaWithBrokenStats(t *testing.T) { `) f, err := filter.Parse([]string{"test.t3"}) + combinedFilter := utils.NewCombinedFilterNoSystem(f) require.NoError(t, err) - _, backupSchemas, _, err := backup.BuildBackupRangeAndInitSchema(m.Storage, f, math.MaxUint64, false, true) + _, backupSchemas, _, err := backup.BuildBackupRangeAndInitSchema(m.Storage, combinedFilter, math.MaxUint64, false, true) require.NoError(t, err) require.Equal(t, 1, backupSchemas.Len()) @@ -253,7 +256,7 @@ func TestBuildBackupRangeAndSchemaWithBrokenStats(t *testing.T) { // recover the statistics. tk.MustExec("analyze table t3 all columns;") - _, backupSchemas, _, err = backup.BuildBackupRangeAndInitSchema(m.Storage, f, math.MaxUint64, false, true) + _, backupSchemas, _, err = backup.BuildBackupRangeAndInitSchema(m.Storage, combinedFilter, math.MaxUint64, false, true) require.NoError(t, err) require.Equal(t, 1, backupSchemas.Len()) @@ -293,8 +296,9 @@ func TestBackupSchemasForSystemTable(t *testing.T) { } f, err := filter.Parse([]string{"mysql.systable*"}) + combinedFilter := utils.NewCombinedFilterNoUser(f) require.NoError(t, err) - _, backupSchemas, _, err := backup.BuildBackupRangeAndInitSchema(m.Storage, f, math.MaxUint64, false, true) + _, backupSchemas, _, err := backup.BuildBackupRangeAndInitSchema(m.Storage, combinedFilter, math.MaxUint64, false, true) require.NoError(t, err) require.Equal(t, systemTablesCount, backupSchemas.Len()) @@ -314,7 +318,7 @@ func TestBackupSchemasForSystemTable(t *testing.T) { schemas2 := GetSchemasFromMeta(t, es2) require.Len(t, schemas2, systemTablesCount) for _, schema := range schemas2 { - require.Equal(t, utils.TemporaryDBName("mysql"), schema.DB.Name) + require.Equal(t, utils.WithTemporaryDBNamePrefix("mysql"), schema.DB.Name) require.Equal(t, true, strings.HasPrefix(schema.Info.Name.O, tablePrefix)) } } diff --git a/br/pkg/checkpoint/checkpoint_test.go b/br/pkg/checkpoint/checkpoint_test.go index b70348aaa5fd9..3facea55229f3 100644 --- a/br/pkg/checkpoint/checkpoint_test.go +++ b/br/pkg/checkpoint/checkpoint_test.go @@ -105,12 +105,12 @@ func TestCheckpointMetaForRestore(t *testing.T) { exists := checkpoint.ExistsCheckpointProgress(ctx, dom) require.False(t, exists) err = checkpoint.SaveCheckpointProgress(ctx, se, &checkpoint.CheckpointProgress{ - Progress: checkpoint.InLogRestoreAndIdMapPersist, + Progress: checkpoint.InLogRestoreAndIdMapPersisted, }) require.NoError(t, err) progress, err := checkpoint.LoadCheckpointProgress(ctx, se.GetSessionCtx().GetRestrictedSQLExecutor()) require.NoError(t, err) - require.Equal(t, checkpoint.InLogRestoreAndIdMapPersist, progress.Progress) + require.Equal(t, checkpoint.InLogRestoreAndIdMapPersisted, progress.Progress) taskInfo, err := checkpoint.TryToGetCheckpointTaskInfo(ctx, s.Mock.Domain, se.GetSessionCtx().GetRestrictedSQLExecutor()) require.NoError(t, err) @@ -120,7 +120,7 @@ func TestCheckpointMetaForRestore(t *testing.T) { require.Equal(t, uint64(333), taskInfo.Metadata.RewriteTS) require.Equal(t, "1.0", taskInfo.Metadata.GcRatio) require.Equal(t, true, taskInfo.HasSnapshotMetadata) - require.Equal(t, checkpoint.InLogRestoreAndIdMapPersist, taskInfo.Progress) + require.Equal(t, checkpoint.InLogRestoreAndIdMapPersisted, taskInfo.Progress) exists = checkpoint.ExistsCheckpointIngestIndexRepairSQLs(ctx, dom) require.False(t, exists) diff --git a/br/pkg/checkpoint/log_restore.go b/br/pkg/checkpoint/log_restore.go index 0fd046b67ad7c..047814079d794 100644 --- a/br/pkg/checkpoint/log_restore.go +++ b/br/pkg/checkpoint/log_restore.go @@ -194,14 +194,14 @@ func ExistsLogRestoreCheckpointMetadata( TableExists(pmodel.NewCIStr(LogRestoreCheckpointDatabaseName), pmodel.NewCIStr(checkpointMetaTableName)) } -// A progress type for snapshot + log restore. +// RestoreProgress is a progress type for snapshot + log restore. // -// Before the id-maps is persist into external storage, the snapshot restore and -// id-maps constructure can be repeated. So if the progress is in `InSnapshotRestore`, +// Before the id-maps is persisted into external storage, the snapshot restore and +// id-maps building can be retried. So if the progress is in `InSnapshotRestore`, // it can retry from snapshot restore. // -// After the id-maps is persist into external storage, there are some meta-kvs has -// been restored into the cluster, such as `rename ddl`. Where would be a situation: +// After the id-maps is persisted into external storage, there are some meta-kvs has +// been restored into the cluster, such as `rename ddl`. A situation could be: // // the first execution: // @@ -209,7 +209,7 @@ func ExistsLogRestoreCheckpointMetadata( // table A (id 80) --------------> table B (id 80) // ( snapshot restore ) ( log restore ) // -// the second execution if don't skip snasphot restore: +// the second execution if don't skip snapshot restore: // // table A is created again in snapshot restore, because there is no table named A // table A (id 81) --------------> [not in id-maps, so ignored] @@ -221,8 +221,8 @@ type RestoreProgress int const ( InSnapshotRestore RestoreProgress = iota - // Only when the id-maps is persist, status turns into it. - InLogRestoreAndIdMapPersist + // Only when the id-maps is persisted, status turns into it. + InLogRestoreAndIdMapPersisted ) type CheckpointProgress struct { @@ -254,8 +254,8 @@ func ExistsCheckpointProgress( TableExists(pmodel.NewCIStr(LogRestoreCheckpointDatabaseName), pmodel.NewCIStr(checkpointProgressTableName)) } -// CheckpointTaskInfo is unique information within the same cluster id. It represents the last -// restore task executed for this cluster. +// CheckpointTaskInfoForLogRestore is tied to a specific cluster. +// It represents the last restore task executed in this cluster. type CheckpointTaskInfoForLogRestore struct { Metadata *CheckpointMetadataForLogRestore HasSnapshotMetadata bool diff --git a/br/pkg/restore/import_mode_switcher.go b/br/pkg/restore/import_mode_switcher.go index 0bec6a4d1e384..5f796292c8879 100644 --- a/br/pkg/restore/import_mode_switcher.go +++ b/br/pkg/restore/import_mode_switcher.go @@ -176,7 +176,7 @@ func (switcher *ImportModeSwitcher) GoSwitchToImportMode( return nil } -// RestorePreWork executes some prepare work before restore. +// RestorePreWork switches to import mode and removes pd schedulers if needed // TODO make this function returns a restore post work. func RestorePreWork( ctx context.Context, diff --git a/br/pkg/restore/log_client/BUILD.bazel b/br/pkg/restore/log_client/BUILD.bazel index 7fb781e7ad0ef..3930edd209e96 100644 --- a/br/pkg/restore/log_client/BUILD.bazel +++ b/br/pkg/restore/log_client/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "log_client", srcs = [ + "batch_meta_processor.go", "client.go", "compacted_file_strategy.go", "import.go", @@ -36,6 +37,7 @@ go_library( "//br/pkg/stream", "//br/pkg/summary", "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//br/pkg/version", "//pkg/ddl/util", @@ -47,7 +49,6 @@ go_library( "//pkg/util/codec", "//pkg/util/redact", "//pkg/util/sqlexec", - "//pkg/util/table-filter", "@com_github_fatih_color//:color", "@com_github_gogo_protobuf//proto", "@com_github_opentracing_opentracing_go//:opentracing-go", @@ -71,7 +72,6 @@ go_library( "@org_golang_x_sync//errgroup", "@org_uber_go_multierr//:multierr", "@org_uber_go_zap//:zap", - "@org_uber_go_zap//zapcore", ], ) @@ -103,6 +103,7 @@ go_test( "//br/pkg/storage", "//br/pkg/stream", "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//br/pkg/utiltest", "//pkg/domain", @@ -117,7 +118,6 @@ go_test( "//pkg/util/chunk", "//pkg/util/codec", "//pkg/util/sqlexec", - "//pkg/util/table-filter", "@com_github_docker_go_units//:go-units", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", diff --git a/br/pkg/restore/log_client/batch_meta_processor.go b/br/pkg/restore/log_client/batch_meta_processor.go new file mode 100644 index 0000000000000..b0e908b2c5d78 --- /dev/null +++ b/br/pkg/restore/log_client/batch_meta_processor.go @@ -0,0 +1,228 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package logclient + +import ( + "context" + "encoding/json" + + "github.com/pingcap/errors" + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/pkg/meta" + "github.com/pingcap/tidb/pkg/meta/model" + "go.uber.org/zap" +) + +// BatchMetaKVProcessor defines how to process a batch of files +type BatchMetaKVProcessor interface { + // ProcessBatch processes a batch of files and with a filterTS and return what's not processed for next iteration + ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, + ) ([]*KvEntryWithTS, error) +} + +// RestoreMetaKVProcessor implements BatchMetaKVProcessor for restoring files in batches +type RestoreMetaKVProcessor struct { + client *LogClient + schemasReplace *stream.SchemasReplace + updateStats func(kvCount uint64, size uint64) + progressInc func() +} + +func NewRestoreMetaKVProcessor(client *LogClient, schemasReplace *stream.SchemasReplace, + updateStats func(kvCount uint64, size uint64), + progressInc func()) *RestoreMetaKVProcessor { + return &RestoreMetaKVProcessor{ + client: client, + schemasReplace: schemasReplace, + updateStats: updateStats, + progressInc: progressInc, + } +} + +// RestoreAndRewriteMetaKVFiles tries to restore files about meta kv-event from stream-backup. +func (rp *RestoreMetaKVProcessor) RestoreAndRewriteMetaKVFiles( + ctx context.Context, + files []*backuppb.DataFileInfo, +) error { + // starts gc row collector + rp.client.RunGCRowsLoader(ctx) + + // separate the files by CF and sort each group by TS + filesInDefaultCF, filesInWriteCF := SeparateAndSortFilesByCF(files) + + log.Info("start to restore meta files", + zap.Int("total files", len(files)), + zap.Int("default files", len(filesInDefaultCF)), + zap.Int("write files", len(filesInWriteCF))) + + if err := LoadAndProcessMetaKVFilesInBatch( + ctx, + filesInDefaultCF, + filesInWriteCF, + rp, + ); err != nil { + return errors.Trace(err) + } + + // UpdateTable global schema version to trigger a full reload so every TiDB node in the cluster will get synced with + // the latest schema update. + if err := rp.client.UpdateSchemaVersionFullReload(ctx); err != nil { + return errors.Trace(err) + } + return nil +} + +func (rp *RestoreMetaKVProcessor) ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*KvEntryWithTS, error) { + return rp.client.RestoreBatchMetaKVFiles( + ctx, files, rp.schemasReplace, entries, + filterTS, rp.updateStats, rp.progressInc, cf, + ) +} + +// MetaKVInfoProcessor implements BatchMetaKVProcessor to iterate meta kv and collect information. +// +// 1. It collects table renaming information. The table rename operation will not change the table id, and the process +// will drop the original table and create a new one with the same table id, so in DDL history there will be two events +// that corresponds to the same table id. +// +// 2. It builds the id mapping from upstream to downstream. This logic was nested into table rewrite previously and now +// separated out to its own component. +type MetaKVInfoProcessor struct { + client *LogClient + tableHistoryManager *stream.LogBackupTableHistoryManager + tableMappingManager *stream.TableMappingManager +} + +func NewMetaKVInfoProcessor(client *LogClient) *MetaKVInfoProcessor { + return &MetaKVInfoProcessor{ + client: client, + tableHistoryManager: stream.NewTableHistoryManager(), + tableMappingManager: stream.NewTableMappingManager(), + } +} + +func (mp *MetaKVInfoProcessor) ReadMetaKVFilesAndBuildInfo( + ctx context.Context, + files []*backuppb.DataFileInfo, +) error { + // separate the files by CF and sort each group by TS + filesInDefaultCF, filesInWriteCF := SeparateAndSortFilesByCF(files) + + if err := LoadAndProcessMetaKVFilesInBatch( + ctx, + filesInDefaultCF, + filesInWriteCF, + mp, + ); err != nil { + return errors.Trace(err) + } + return nil +} + +func (mp *MetaKVInfoProcessor) ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*KvEntryWithTS, error) { + curSortedEntries, filteredEntries, err := mp.client.filterAndSortKvEntriesFromFiles(ctx, files, entries, filterTS) + if err != nil { + return nil, errors.Trace(err) + } + + // process entries to collect table IDs + for _, entry := range curSortedEntries { + // get value from default cf and get the short value if possible from write cf + value, err := stream.ExtractValue(&entry.E, cf) + if err != nil { + return nil, errors.Trace(err) + } + + // write cf doesn't have short value in it + if value == nil { + continue + } + + if utils.IsMetaDBKey(entry.E.Key) { + rawKey, err := stream.ParseTxnMetaKeyFrom(entry.E.Key) + if err != nil { + return nil, errors.Trace(err) + } + + if meta.IsDBkey(rawKey.Field) { + var dbInfo model.DBInfo + if err := json.Unmarshal(value, &dbInfo); err != nil { + return nil, errors.Trace(err) + } + + // collect db id -> name mapping during log backup, it will contain information about newly created db + mp.tableHistoryManager.RecordDBIdToName(dbInfo.ID, dbInfo.Name.O) + + // update the id map + if err = mp.tableMappingManager.ProcessDBValueAndUpdateIdMapping(dbInfo); err != nil { + return nil, errors.Trace(err) + } + } else if !meta.IsDBkey(rawKey.Key) { + // also see RewriteMetaKvEntry + continue + } + + // collect table history indexed by table id, same id may have different table names in history + if meta.IsTableKey(rawKey.Field) { + var tableInfo model.TableInfo + if err := json.Unmarshal(value, &tableInfo); err != nil { + return nil, errors.Trace(err) + } + // cannot use dbib in the parsed table info cuz it might not set so default to 0 + dbID, err := meta.ParseDBKey(rawKey.Key) + if err != nil { + return nil, errors.Trace(err) + } + + // add to table rename history + mp.tableHistoryManager.AddTableHistory(tableInfo.ID, tableInfo.Name.String(), dbID) + + // update the id map + if err = mp.tableMappingManager.ProcessTableValueAndUpdateIdMapping(dbID, tableInfo); err != nil { + return nil, errors.Trace(err) + } + } + } + } + return filteredEntries, nil +} + +func (mp *MetaKVInfoProcessor) GetTableMappingManager() *stream.TableMappingManager { + return mp.tableMappingManager +} + +func (mp *MetaKVInfoProcessor) GetTableHistoryManager() *stream.LogBackupTableHistoryManager { + return mp.tableHistoryManager +} diff --git a/br/pkg/restore/log_client/client.go b/br/pkg/restore/log_client/client.go index 474578a05e8dc..fce397fb3ea8e 100644 --- a/br/pkg/restore/log_client/client.go +++ b/br/pkg/restore/log_client/client.go @@ -55,6 +55,7 @@ import ( "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/br/pkg/version" ddlutil "github.com/pingcap/tidb/pkg/ddl/util" @@ -64,13 +65,11 @@ import ( "github.com/pingcap/tidb/pkg/meta/model" tidbutil "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/sqlexec" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/tikv/client-go/v2/config" kvutil "github.com/tikv/client-go/v2/util" pd "github.com/tikv/pd/client" pdhttp "github.com/tikv/pd/client/http" "go.uber.org/zap" - "go.uber.org/zap/zapcore" "golang.org/x/sync/errgroup" "google.golang.org/grpc/keepalive" ) @@ -211,8 +210,8 @@ type LogClient struct { useCheckpoint bool } -// NewRestoreClient returns a new RestoreClient. -func NewRestoreClient( +// NewLogClient returns a new LogClient. +func NewLogClient( pdClient pd.Client, pdHTTPCli pdhttp.Client, tlsConf *tls.Config, @@ -230,24 +229,27 @@ func NewRestoreClient( // Close a client. func (rc *LogClient) Close(ctx context.Context) { - defer func() { - if rc.logRestoreManager != nil { - rc.logRestoreManager.Close(ctx) - } - if rc.sstRestoreManager != nil { - rc.sstRestoreManager.Close(ctx) - } - }() - - // close the connection, and it must be succeed when in SQL mode. + // close the connection, and it must be succeeded when in SQL mode. if rc.unsafeSession != nil { rc.unsafeSession.Close() } + if rc.LogFileManager != nil { + rc.LogFileManager.Close() + } + if rc.rawKVClient != nil { rc.rawKVClient.Close() } - log.Info("Restore client closed") + + if rc.logRestoreManager != nil { + rc.logRestoreManager.Close(ctx) + } + + if rc.sstRestoreManager != nil { + rc.sstRestoreManager.Close(ctx) + } + log.Info("Log client closed") } func (rc *LogClient) RestoreCompactedSstFiles( @@ -473,7 +475,7 @@ func (rc *LogClient) InitCheckpointMetadataForCompactedSstRestore( return sstCheckpointSets, nil } -func (rc *LogClient) InitCheckpointMetadataForLogRestore( +func (rc *LogClient) LoadOrCreateCheckpointMetadataForLogRestore( ctx context.Context, startTS, restoredTS uint64, gcRatio string, @@ -490,7 +492,8 @@ func (rc *LogClient) InitCheckpointMetadataForLogRestore( return "", errors.Trace(err) } - log.Info("reuse gc ratio from checkpoint metadata", zap.String("gc-ratio", gcRatio)) + log.Info("reuse gc ratio from checkpoint metadata", zap.String("old-gc-ratio", gcRatio), + zap.String("checkpoint-gc-ratio", meta.GcRatio)) return meta.GcRatio, nil } @@ -606,7 +609,7 @@ func ApplyKVFilesWithBatchMethod( } fs.deleteFiles = append(fs.deleteFiles, f) } else { - if f.GetCf() == stream.DefaultCF { + if f.GetCf() == consts.DefaultCF { if fs.defaultFiles == nil { fs.defaultFiles = make([]*LogDataFileInfo, 0, batchCount) } @@ -739,6 +742,17 @@ func (rc *LogClient) RestoreKVFiles( var applyWg sync.WaitGroup eg, ectx := errgroup.WithContext(ctx) + //log.Info("################ rewrite rules", zap.Any("rules", rules)) + //for _, schema := range rc.dom.InfoSchema().AllSchemas() { + // log.Info("############### schema", zap.Any("schema", schema)) + // info, _ := rc.dom.InfoSchema().SchemaTableInfos(ctx, schema.Name) + // log.Info("################ schema table size", zap.Any("size", len(info))) + // if len(info) < 100 { + // for _, i := range info { + // log.Info("################ tables", zap.Any("tables", i.Name.O), zap.Any("id", i.ID)) + // } + // } + //} applyFunc := func(files []*LogDataFileInfo, kvCount int64, size uint64) { if len(files) == 0 { return @@ -809,7 +823,7 @@ func (rc *LogClient) RestoreKVFiles( return errors.Trace(err) } -func (rc *LogClient) initSchemasMap( +func (rc *LogClient) loadSchemasMap( ctx context.Context, restoreTS uint64, ) ([]*backuppb.PitrDBMap, error) { @@ -849,10 +863,11 @@ func (rc *LogClient) initSchemasMap( return backupMeta.GetDbMaps(), nil } -func initFullBackupTables( +func readFilteredFullBackupTables( ctx context.Context, s storage.ExternalStorage, - tableFilter filter.Filter, + tableFilter *utils.CombinedFilter, + piTRTableFilter *utils.PiTRTableFilter, cipherInfo *backuppb.CipherInfo, ) (map[int64]*metautil.Table, error) { metaData, err := s.ReadFile(ctx, metautil.MetaFile) @@ -881,24 +896,32 @@ func initFullBackupTables( tables := make(map[int64]*metautil.Table) for _, db := range databases { dbName := db.Info.Name.O - if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { - dbName = name - } - - if !tableFilter.MatchSchema(dbName) { + if !tableFilter.MatchSchema(dbName) && !(piTRTableFilter != nil && piTRTableFilter.ContainsDB(db.Info.ID)) { continue } + tableAdded := false for _, table := range db.Tables { // check this db is empty. if table.Info == nil { tables[db.Info.ID] = table + tableAdded = true continue } - if !tableFilter.MatchTable(dbName, table.Info.Name.O) { + if !tableFilter.MatchTable(dbName, table.Info.Name.O) && + !(piTRTableFilter != nil && piTRTableFilter.ContainsTable(db.Info.ID, table.Info.ID)) { continue } tables[table.Info.ID] = table + tableAdded = true + } + // all tables in this db are filtered out, but we still need to keep this db since it passed the filter check + // and tables might get created later during log backup, if not keeping this db, those tables will be mapped to + // a new db id and thus will become data corruption. + if !tableAdded { + tables[db.Info.ID] = &metautil.Table{ + DB: db.Info, + } } } @@ -910,22 +933,25 @@ type FullBackupStorageConfig struct { Opts *storage.ExternalStorageOptions } -type BuildTableMappingManagerConfig struct { +type GetIDMapConfig struct { // required - CurrentIdMapSaved bool - TableFilter filter.Filter + LoadSavedIDMap bool + TableFilter *utils.CombinedFilter // original table filter from user // optional FullBackupStorage *FullBackupStorageConfig CipherInfo *backuppb.CipherInfo - Files []*backuppb.DataFileInfo + PiTRTableFilter *utils.PiTRTableFilter // generated table filter that contain all the table id that needs to restore } const UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL = "UNSAFE_PITR_LOG_RESTORE_START_BEFORE_ANY_UPSTREAM_USER_DDL" +// generateDBReplacesFromFullBackupStorage reads the full backup schema and creates the mapping from upstream table id +// to downstream table id. The downstream tables have been created in the previous snapshot restore step, so we +// can build the mapping by looking at the table names. The current table information is in domain.InfoSchema. func (rc *LogClient) generateDBReplacesFromFullBackupStorage( ctx context.Context, - cfg *BuildTableMappingManagerConfig, + cfg *GetIDMapConfig, ) (map[stream.UpstreamID]*stream.DBReplace, error) { dbReplaces := make(map[stream.UpstreamID]*stream.DBReplace) if cfg.FullBackupStorage == nil { @@ -940,15 +966,15 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( if err != nil { return nil, errors.Trace(err) } - fullBackupTables, err := initFullBackupTables(ctx, s, cfg.TableFilter, cfg.CipherInfo) + filteredFullBackupTables, err := readFilteredFullBackupTables(ctx, s, cfg.TableFilter, cfg.PiTRTableFilter, cfg.CipherInfo) if err != nil { return nil, errors.Trace(err) } - for _, t := range fullBackupTables { + for _, t := range filteredFullBackupTables { dbName, _ := utils.GetSysDBCIStrName(t.DB.Name) newDBInfo, exist := rc.dom.InfoSchema().SchemaByName(dbName) if !exist { - log.Info("db not existed", zap.String("dbname", dbName.String())) + log.Info("db does not exist", zap.String("dbName", dbName.String())) continue } @@ -964,7 +990,7 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( } newTableInfo, err := restore.GetTableSchema(rc.GetDomain(), dbName, t.Info.Name) if err != nil { - log.Info("table not existed", zap.String("tablename", dbName.String()+"."+t.Info.Name.String())) + log.Info("table doesn't exist", zap.String("tableName", dbName.String()+"."+t.Info.Name.String())) continue } @@ -978,26 +1004,25 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( return dbReplaces, nil } -// BuildTableMappingManager builds the table mapping manager. It reads the full backup storage to get the full backup -// table info to initialize the manager, or it reads the id map from previous task, -// or it loads the saved mapping from last time of run of the same task. -func (rc *LogClient) BuildTableMappingManager( +// GetBaseIDMap get the id map from following ways +// 1. from previously saved id map if the same task has been running and built/saved id map already but failed later +// 2. from previous different task. A PiTR job might be split into multiple runs/tasks and each task only restores +// a subset of the entire job. +// 3. from full backup snapshot if specified. +func (rc *LogClient) GetBaseIDMap( ctx context.Context, - cfg *BuildTableMappingManagerConfig, -) (*stream.TableMappingManager, error) { + cfg *GetIDMapConfig, +) (map[stream.UpstreamID]*stream.DBReplace, error) { var ( - err error - dbMaps []*backuppb.PitrDBMap - // the id map doesn't need to construct only when it is not the first execution - needConstructIdMap bool - dbReplaces map[stream.UpstreamID]*stream.DBReplace + err error + dbMaps []*backuppb.PitrDBMap + dbReplaces map[stream.UpstreamID]*stream.DBReplace ) // this is a retry, id map saved last time, load it from external storage - if cfg.CurrentIdMapSaved { + if cfg.LoadSavedIDMap { log.Info("try to load previously saved pitr id maps") - needConstructIdMap = false - dbMaps, err = rc.initSchemasMap(ctx, rc.restoreTS) + dbMaps, err = rc.loadSchemasMap(ctx, rc.restoreTS) if err != nil { return nil, errors.Trace(err) } @@ -1007,8 +1032,7 @@ func (rc *LogClient) BuildTableMappingManager( // schemas map whose `restore-ts`` is the task's `start-ts`. if len(dbMaps) <= 0 && cfg.FullBackupStorage == nil { log.Info("try to load pitr id maps of the previous task", zap.Uint64("start-ts", rc.startTS)) - needConstructIdMap = true - dbMaps, err = rc.initSchemasMap(ctx, rc.startTS) + dbMaps, err = rc.loadSchemasMap(ctx, rc.startTS) if err != nil { return nil, errors.Trace(err) } @@ -1026,7 +1050,6 @@ func (rc *LogClient) BuildTableMappingManager( if len(dbMaps) <= 0 { log.Info("no id maps, build the table replaces from cluster and full backup schemas") - needConstructIdMap = true dbReplaces, err = rc.generateDBReplacesFromFullBackupStorage(ctx, cfg) if err != nil { return nil, errors.Trace(err) @@ -1035,33 +1058,9 @@ func (rc *LogClient) BuildTableMappingManager( dbReplaces = stream.FromDBMapProto(dbMaps) } - for oldDBID, dbReplace := range dbReplaces { - log.Info("base replace info", func() []zapcore.Field { - fields := make([]zapcore.Field, 0, (len(dbReplace.TableMap)+1)*3) - fields = append(fields, - zap.String("dbName", dbReplace.Name), - zap.Int64("oldID", oldDBID), - zap.Int64("newID", dbReplace.DbID)) - for oldTableID, tableReplace := range dbReplace.TableMap { - fields = append(fields, - zap.String("table", tableReplace.Name), - zap.Int64("oldID", oldTableID), - zap.Int64("newID", tableReplace.TableID)) - } - return fields - }()...) - } - - tableMappingManager := stream.NewTableMappingManager(dbReplaces, rc.GenGlobalID) - - // not loaded from previously saved, need to iter meta kv and build and save the map - if needConstructIdMap { - if err = rc.IterMetaKVToBuildAndSaveIdMap(ctx, tableMappingManager, cfg.Files); err != nil { - return nil, errors.Trace(err) - } - } + stream.LogDBReplaceMap("base db replace info", dbReplaces) - return tableMappingManager, nil + return dbReplaces, nil } func SortMetaKVFiles(files []*backuppb.DataFileInfo) []*backuppb.DataFileInfo { @@ -1077,14 +1076,24 @@ func SortMetaKVFiles(files []*backuppb.DataFileInfo) []*backuppb.DataFileInfo { return files } -// RestoreAndRewriteMetaKVFiles tries to restore files about meta kv-event from stream-backup. -func (rc *LogClient) RestoreAndRewriteMetaKVFiles( - ctx context.Context, - files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - updateStats func(kvCount uint64, size uint64), - progressInc func(), -) error { +// validateNoTiFlashReplica makes sure no table contains TiFlash replica +func (rc *LogClient) validateNoTiFlashReplica() error { + existTiFlashTable := false + rc.dom.InfoSchema().ListTablesWithSpecialAttribute(func(tableInfo *model.TableInfo) bool { + if tableInfo.TiFlashReplica != nil && tableInfo.TiFlashReplica.Count > 0 { + existTiFlashTable = true + } + return false + }) + if existTiFlashTable { + return errors.Errorf("exist table(s) have tiflash replica, please remove it before restore") + } + return nil +} + +// SeparateAndSortFilesByCF filters and sorts files by column family. +// It separates files into write CF and default CF groups and then sorts them within each CF group. +func SeparateAndSortFilesByCF(files []*backuppb.DataFileInfo) ([]*backuppb.DataFileInfo, []*backuppb.DataFileInfo) { filesInWriteCF := make([]*backuppb.DataFileInfo, 0, len(files)) filesInDefaultCF := make([]*backuppb.DataFileInfo, 0, len(files)) @@ -1092,70 +1101,15 @@ func (rc *LogClient) RestoreAndRewriteMetaKVFiles( // The error of transactions of meta could happen if restore write CF events successfully, // but failed to restore default CF events. for _, f := range files { - if f.Cf == stream.WriteCF { + if f.Cf == consts.WriteCF { filesInWriteCF = append(filesInWriteCF, f) continue } if f.Type == backuppb.FileType_Delete { - // this should happen abnormally. - // only do some preventive checks here. - log.Warn("detected delete file of meta key, skip it", zap.Any("file", f)) - continue - } - if f.Cf == stream.DefaultCF { - filesInDefaultCF = append(filesInDefaultCF, f) - } - } - filesInDefaultCF = SortMetaKVFiles(filesInDefaultCF) - filesInWriteCF = SortMetaKVFiles(filesInWriteCF) - - log.Info("start to restore meta files", - zap.Int("total files", len(files)), - zap.Int("default files", len(filesInDefaultCF)), - zap.Int("write files", len(filesInWriteCF))) - - // run the rewrite and restore meta-kv into TiKV cluster. - if err := RestoreMetaKVFilesWithBatchMethod( - ctx, - filesInDefaultCF, - filesInWriteCF, - schemasReplace, - updateStats, - progressInc, - rc.RestoreBatchMetaKVFiles, - ); err != nil { - return errors.Trace(err) - } - - // Update global schema version and report all of TiDBs. - if err := rc.UpdateSchemaVersion(ctx); err != nil { - return errors.Trace(err) - } - return nil -} - -// IterMetaKVToBuildAndSaveIdMap iterates meta kv and builds id mapping and saves it to storage. -func (rc *LogClient) IterMetaKVToBuildAndSaveIdMap( - ctx context.Context, - tableMappingManager *stream.TableMappingManager, - files []*backuppb.DataFileInfo, -) error { - filesInDefaultCF := make([]*backuppb.DataFileInfo, 0, len(files)) - // need to look at write cf for "short value", which inlines the actual values without redirecting to default cf - filesInWriteCF := make([]*backuppb.DataFileInfo, 0, len(files)) - - for _, f := range files { - if f.Type == backuppb.FileType_Delete { - // it should not happen - // only do some preventive checks here. log.Warn("internal error: detected delete file of meta key, skip it", zap.Any("file", f)) continue } - if f.Cf == stream.WriteCF { - filesInWriteCF = append(filesInWriteCF, f) - continue - } - if f.Cf == stream.DefaultCF { + if f.Cf == consts.DefaultCF { filesInDefaultCF = append(filesInDefaultCF, f) } } @@ -1163,88 +1117,16 @@ func (rc *LogClient) IterMetaKVToBuildAndSaveIdMap( filesInDefaultCF = SortMetaKVFiles(filesInDefaultCF) filesInWriteCF = SortMetaKVFiles(filesInWriteCF) - failpoint.Inject("failed-before-id-maps-saved", func(_ failpoint.Value) { - failpoint.Return(errors.New("failpoint: failed before id maps saved")) - }) - - log.Info("start to iterate meta kv and build id map", - zap.Int("total files", len(files)), - zap.Int("default files", len(filesInDefaultCF)), - zap.Int("write files", len(filesInWriteCF))) - - // build the map and save it into external storage. - if err := rc.buildAndSaveIDMap( - ctx, - filesInDefaultCF, - filesInWriteCF, - tableMappingManager, - ); err != nil { - return errors.Trace(err) - } - failpoint.Inject("failed-after-id-maps-saved", func(_ failpoint.Value) { - failpoint.Return(errors.New("failpoint: failed after id maps saved")) - }) - return nil -} - -// buildAndSaveIDMap build id mapping and save it. -func (rc *LogClient) buildAndSaveIDMap( - ctx context.Context, - fsInDefaultCF []*backuppb.DataFileInfo, - fsInWriteCF []*backuppb.DataFileInfo, - tableMappingManager *stream.TableMappingManager, -) error { - if err := rc.iterAndBuildIDMap(ctx, fsInWriteCF, tableMappingManager); err != nil { - return errors.Trace(err) - } - - if err := rc.iterAndBuildIDMap(ctx, fsInDefaultCF, tableMappingManager); err != nil { - return errors.Trace(err) - } - - if err := rc.saveIDMap(ctx, tableMappingManager); err != nil { - return errors.Trace(err) - } - return nil -} - -func (rc *LogClient) iterAndBuildIDMap( - ctx context.Context, - fs []*backuppb.DataFileInfo, - tableMappingManager *stream.TableMappingManager, -) error { - for _, f := range fs { - entries, _, err := rc.ReadAllEntries(ctx, f, math.MaxUint64) - if err != nil { - return errors.Trace(err) - } - - for _, entry := range entries { - if err := tableMappingManager.ParseMetaKvAndUpdateIdMapping(&entry.E, f.GetCf()); err != nil { - return errors.Trace(err) - } - } - } - return nil + return filesInDefaultCF, filesInWriteCF } -func RestoreMetaKVFilesWithBatchMethod( +// LoadAndProcessMetaKVFilesInBatch restores meta kv files to TiKV in strict TS order. It does so in batch and after +// success it triggers an update so every TiDB node can pick up the restored content. +func LoadAndProcessMetaKVFilesInBatch( ctx context.Context, defaultFiles []*backuppb.DataFileInfo, writeFiles []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - restoreBatch func( - ctx context.Context, - files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - kvEntries []*KvEntryWithTS, - filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*KvEntryWithTS, error), + processor BatchMetaKVProcessor, ) error { // the average size of each KV is 2560 Bytes // kvEntries is kvs left by the previous batch @@ -1275,7 +1157,7 @@ func RestoreMetaKVFilesWithBatchMethod( } else { // Either f.MinTS > rangeMax or f.MinTs is the filterTs we need. // So it is ok to pass f.MinTs as filterTs. - defaultKvEntries, err = restoreBatch(ctx, defaultFiles[defaultIdx:i], schemasReplace, defaultKvEntries, f.MinTs, updateStats, progressInc, stream.DefaultCF) + defaultKvEntries, err = processor.ProcessBatch(ctx, defaultFiles[defaultIdx:i], defaultKvEntries, f.MinTs, consts.DefaultCF) if err != nil { return errors.Trace(err) } @@ -1292,7 +1174,7 @@ func RestoreMetaKVFilesWithBatchMethod( break } } - writeKvEntries, err = restoreBatch(ctx, writeFiles[writeIdx:toWriteIdx], schemasReplace, writeKvEntries, f.MinTs, updateStats, progressInc, stream.WriteCF) + writeKvEntries, err = processor.ProcessBatch(ctx, writeFiles[writeIdx:toWriteIdx], writeKvEntries, f.MinTs, consts.WriteCF) if err != nil { return errors.Trace(err) } @@ -1304,11 +1186,11 @@ func RestoreMetaKVFilesWithBatchMethod( // restore the left meta kv files and entries // Notice: restoreBatch needs to realize the parameter `files` and `kvEntries` might be empty // Assert: defaultIdx <= len(defaultFiles) && writeIdx <= len(writeFiles) - _, err = restoreBatch(ctx, defaultFiles[defaultIdx:], schemasReplace, defaultKvEntries, math.MaxUint64, updateStats, progressInc, stream.DefaultCF) + _, err = processor.ProcessBatch(ctx, defaultFiles[defaultIdx:], defaultKvEntries, math.MaxUint64, consts.DefaultCF) if err != nil { return errors.Trace(err) } - _, err = restoreBatch(ctx, writeFiles[writeIdx:], schemasReplace, writeKvEntries, math.MaxUint64, updateStats, progressInc, stream.WriteCF) + _, err = processor.ProcessBatch(ctx, writeFiles[writeIdx:], writeKvEntries, math.MaxUint64, consts.WriteCF) if err != nil { return errors.Trace(err) } @@ -1316,6 +1198,9 @@ func RestoreMetaKVFilesWithBatchMethod( return nil } +// RestoreBatchMetaKVFiles tries to restore and rewrite meta kv to TiKV from external storage. It reads out entries +// from the given files and only restores ones that's in filter range, then it returns those entries out of the filter +// range back to caller for next iteration of restore. func (rc *LogClient) RestoreBatchMetaKVFiles( ctx context.Context, files []*backuppb.DataFileInfo, @@ -1326,10 +1211,38 @@ func (rc *LogClient) RestoreBatchMetaKVFiles( progressInc func(), cf string, ) ([]*KvEntryWithTS, error) { - nextKvEntries := make([]*KvEntryWithTS, 0) + curSortedKvEntries, filteredOutKvEntries, err := rc.filterAndSortKvEntriesFromFiles(ctx, files, kvEntries, filterTS) + if err != nil { + return nil, errors.Trace(err) + } + + if len(curSortedKvEntries) == 0 { + return filteredOutKvEntries, nil + } + + // restore and rewrite these entries to TiKV with rawPut() method. + kvCount, size, err := rc.restoreAndRewriteMetaKvEntries(ctx, schemasReplace, curSortedKvEntries, cf) + if err != nil { + return nil, errors.Trace(err) + } + + updateStats(kvCount, size) + for i := 0; i < len(files); i++ { + progressInc() + } + return filteredOutKvEntries, nil +} + +func (rc *LogClient) filterAndSortKvEntriesFromFiles( + ctx context.Context, + files []*backuppb.DataFileInfo, + kvEntries []*KvEntryWithTS, + filterTS uint64, +) ([]*KvEntryWithTS, []*KvEntryWithTS, error) { + filteredOutKvEntries := make([]*KvEntryWithTS, 0) curKvEntries := make([]*KvEntryWithTS, 0) if len(files) == 0 && len(kvEntries) == 0 { - return nextKvEntries, nil + return curKvEntries, filteredOutKvEntries, nil } // filter the kv from kvEntries again. @@ -1337,40 +1250,29 @@ func (rc *LogClient) RestoreBatchMetaKVFiles( if kv.Ts < filterTS { curKvEntries = append(curKvEntries, kv) } else { - nextKvEntries = append(nextKvEntries, kv) + filteredOutKvEntries = append(filteredOutKvEntries, kv) } } - // read all of entries from files. + // read all entries from files. for _, f := range files { - es, nextEs, err := rc.ReadAllEntries(ctx, f, filterTS) + es, filteredOutEs, err := rc.ReadFilteredEntriesFromFiles(ctx, f, filterTS) if err != nil { - return nextKvEntries, errors.Trace(err) + return nil, nil, errors.Trace(err) } curKvEntries = append(curKvEntries, es...) - nextKvEntries = append(nextKvEntries, nextEs...) + filteredOutKvEntries = append(filteredOutKvEntries, filteredOutEs...) } // sort these entries. slices.SortFunc(curKvEntries, func(i, j *KvEntryWithTS) int { return cmp.Compare(i.Ts, j.Ts) }) - - // restore these entries with rawPut() method. - kvCount, size, err := rc.restoreMetaKvEntries(ctx, schemasReplace, curKvEntries, cf) - if err != nil { - return nextKvEntries, errors.Trace(err) - } - - updateStats(kvCount, size) - for i := 0; i < len(files); i++ { - progressInc() - } - return nextKvEntries, nil + return curKvEntries, filteredOutKvEntries, nil } -func (rc *LogClient) restoreMetaKvEntries( +func (rc *LogClient) restoreAndRewriteMetaKvEntries( ctx context.Context, sr *stream.SchemasReplace, entries []*KvEntryWithTS, @@ -1384,10 +1286,10 @@ func (rc *LogClient) restoreMetaKvEntries( rc.rawKVClient.SetColumnFamily(columnFamily) for _, entry := range entries { - log.Debug("before rewrte entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), + log.Debug("before rewriting entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), zap.Int("value-len", len(entry.E.Value)), zap.ByteString("key", entry.E.Key)) - newEntry, err := sr.RewriteKvEntry(&entry.E, columnFamily) + newEntry, err := sr.RewriteMetaKvEntry(&entry.E, columnFamily) if err != nil { log.Error("rewrite txn entry failed", zap.Int("klen", len(entry.E.Key)), logutil.Key("txn-key", entry.E.Key)) @@ -1457,8 +1359,8 @@ func (rc *LogClient) GenGlobalIDs(ctx context.Context, n int) ([]int64, error) { return ids, err } -// UpdateSchemaVersion updates schema version by transaction way. -func (rc *LogClient) UpdateSchemaVersion(ctx context.Context) error { +// UpdateSchemaVersionFullReload updates schema version to trigger a full reload in transaction way. +func (rc *LogClient) UpdateSchemaVersionFullReload(ctx context.Context) error { storage := rc.GetDomain().Store() var schemaVersion int64 @@ -1813,6 +1715,24 @@ func (rc *LogClient) GetGCRows() []*stream.PreDelRangeQuery { const PITRIdMapBlockSize int = 524288 +func (rc *LogClient) SaveIdMapWithFailPoints( + ctx context.Context, + manager *stream.TableMappingManager, +) error { + failpoint.Inject("failed-before-id-maps-saved", func(_ failpoint.Value) { + failpoint.Return(errors.New("failpoint: failed before id maps saved")) + }) + + if err := rc.saveIDMap(ctx, manager); err != nil { + return errors.Trace(err) + } + + failpoint.Inject("failed-after-id-maps-saved", func(_ failpoint.Value) { + failpoint.Return(errors.New("failpoint: failed after id maps saved")) + }) + return nil +} + // saveIDMap saves the id mapping information. func (rc *LogClient) saveIDMap( ctx context.Context, @@ -1844,7 +1764,7 @@ func (rc *LogClient) saveIDMap( if rc.useCheckpoint { log.Info("save checkpoint task info with InLogRestoreAndIdMapPersist status") if err := checkpoint.SaveCheckpointProgress(ctx, rc.unsafeSession, &checkpoint.CheckpointProgress{ - Progress: checkpoint.InLogRestoreAndIdMapPersist, + Progress: checkpoint.InLogRestoreAndIdMapPersisted, }); err != nil { return errors.Trace(err) } diff --git a/br/pkg/restore/log_client/client_test.go b/br/pkg/restore/log_client/client_test.go index 1b16b25ecfa46..504643c6aac3c 100644 --- a/br/pkg/restore/log_client/client_test.go +++ b/br/pkg/restore/log_client/client_test.go @@ -35,6 +35,7 @@ import ( "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/br/pkg/utiltest" "github.com/pingcap/tidb/pkg/domain" @@ -47,7 +48,6 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/sqlexec" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/stretchr/testify/require" "google.golang.org/grpc/keepalive" ) @@ -95,7 +95,7 @@ func TestDeleteRangeQueryExec(t *testing.T) { ctx := context.Background() m := mc g := gluetidb.New() - client := logclient.NewRestoreClient( + client := logclient.NewLogClient( split.NewFakePDClient(nil, false, nil), nil, nil, keepalive.ClientParameters{}) err := client.Init(ctx, g, m.Storage) require.NoError(t, err) @@ -114,7 +114,7 @@ func TestDeleteRangeQuery(t *testing.T) { m := mc g := gluetidb.New() - client := logclient.NewRestoreClient( + client := logclient.NewLogClient( split.NewFakePDClient(nil, false, nil), nil, nil, keepalive.ClientParameters{}) err := client.Init(ctx, g, m.Storage) require.NoError(t, err) @@ -136,19 +136,8 @@ func TestDeleteRangeQuery(t *testing.T) { } } -func MockEmptySchemasReplace() *stream.SchemasReplace { - dbMap := make(map[stream.UpstreamID]*stream.DBReplace) - return stream.NewSchemasReplace( - dbMap, - nil, - 1, - filter.All(), - nil, - ) -} - func TestRestoreBatchMetaKVFiles(t *testing.T) { - client := logclient.NewRestoreClient(nil, nil, nil, keepalive.ClientParameters{}) + client := logclient.NewLogClient(nil, nil, nil, keepalive.ClientParameters{}) files := []*backuppb.DataFileInfo{} // test empty files and entries next, err := client.RestoreBatchMetaKVFiles(context.Background(), files[0:], nil, make([]*logclient.KvEntryWithTS, 0), math.MaxUint64, nil, nil, "") @@ -157,41 +146,35 @@ func TestRestoreBatchMetaKVFiles(t *testing.T) { } func TestRestoreMetaKVFilesWithBatchMethod1(t *testing.T) { - files_default := []*backuppb.DataFileInfo{} - files_write := []*backuppb.DataFileInfo{} + var filesDefault []*backuppb.DataFileInfo + var filesWrite []*backuppb.DataFileInfo batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { require.Equal(t, 0, len(entries)) require.Equal(t, 0, len(files)) batchCount++ return nil, nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 2) } func TestRestoreMetaKVFilesWithBatchMethod2_default_empty(t *testing.T) { - files_default := []*backuppb.DataFileInfo{} - files_write := []*backuppb.DataFileInfo{ + var filesDefault []*backuppb.DataFileInfo + filesWrite := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, @@ -200,89 +183,78 @@ func TestRestoreMetaKVFilesWithBatchMethod2_default_empty(t *testing.T) { } batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) batchCount++ } else { require.Equal(t, 0, len(entries)) require.Equal(t, 1, len(files)) require.Equal(t, uint64(100), files[0].MinTs) - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) } require.Equal(t, uint64(math.MaxUint64), filterTS) return nil, nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 1) } func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_1(t *testing.T) { - files_default := []*backuppb.DataFileInfo{ + filesDefault := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, MaxTs: 120, }, } - files_write := []*backuppb.DataFileInfo{} + var filesWrite []*backuppb.DataFileInfo batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) batchCount++ } else { require.Equal(t, 0, len(entries)) require.Equal(t, 1, len(files)) require.Equal(t, uint64(100), files[0].MinTs) - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) } require.Equal(t, uint64(math.MaxUint64), filterTS) return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 1) } func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { - files_default := []*backuppb.DataFileInfo{ + filesDefault := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, @@ -296,31 +268,19 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { Length: logclient.MetaKVBatchSize, }, } - files_write := []*backuppb.DataFileInfo{} + var filesWrite []*backuppb.DataFileInfo emptyCount := 0 batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { // write - write - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) emptyCount++ if emptyCount == 1 { require.Equal(t, uint64(110), filterTS) @@ -331,7 +291,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { // default - default batchCount++ require.Equal(t, 1, len(files)) - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) if batchCount == 1 { require.Equal(t, uint64(100), files[0].MinTs) require.Equal(t, uint64(110), filterTS) @@ -341,6 +301,13 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { } return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 2) @@ -348,7 +315,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { } func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { - files_default := []*backuppb.DataFileInfo{ + filesDefault := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, @@ -362,31 +329,19 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { Length: logclient.MetaKVBatchSize, }, } - files_write := []*backuppb.DataFileInfo{} + var filesWrite []*backuppb.DataFileInfo emptyCount := 0 batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { // write - write - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) emptyCount++ if emptyCount == 1 { require.Equal(t, uint64(110), filterTS) @@ -397,7 +352,7 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { // default - default batchCount++ require.Equal(t, 1, len(files)) - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) if batchCount == 1 { require.Equal(t, uint64(100), files[0].MinTs) require.Equal(t, uint64(110), filterTS) @@ -407,6 +362,13 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { } return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 2) @@ -473,31 +435,27 @@ func TestRestoreMetaKVFilesWithBatchMethod3(t *testing.T) { result := make(map[int][]*backuppb.DataFileInfo) resultKV := make(map[int]int) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files t.Log(filterTS) resultKV[batchCount] = len(entries) batchCount++ return make([]*logclient.KvEntryWithTS, batchCount), nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) + require.Nil(t, err) require.Equal(t, len(result), 4) require.Equal(t, result[0], defaultFiles[0:3]) @@ -559,29 +517,25 @@ func TestRestoreMetaKVFilesWithBatchMethod4(t *testing.T) { batchCount := 0 result := make(map[int][]*backuppb.DataFileInfo) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files batchCount++ return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) + require.Nil(t, err) require.Equal(t, len(result), 4) require.Equal(t, result[0], defaultFiles[0:2]) @@ -639,28 +593,22 @@ func TestRestoreMetaKVFilesWithBatchMethod5(t *testing.T) { batchCount := 0 result := make(map[int][]*backuppb.DataFileInfo) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files batchCount++ return nil, nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) require.Nil(t, err) require.Equal(t, len(result), 4) @@ -736,30 +684,24 @@ func TestRestoreMetaKVFilesWithBatchMethod6(t *testing.T) { result := make(map[int][]*backuppb.DataFileInfo) resultKV := make(map[int]int) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files t.Log(filterTS) resultKV[batchCount] = len(entries) batchCount++ return make([]*logclient.KvEntryWithTS, batchCount), nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) require.Nil(t, err) require.Equal(t, len(result), 6) @@ -839,20 +781,20 @@ func TestApplyKVFilesWithSingelMethod(t *testing.T) { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, }, { Path: "log1", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, }, } @@ -894,28 +836,28 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log1", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -923,7 +865,7 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { Path: "log2", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -977,35 +919,35 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -1013,7 +955,7 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { Path: "log6", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -1068,28 +1010,28 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 3, }, @@ -1097,7 +1039,7 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { Path: "log6", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 3, }, @@ -1151,35 +1093,35 @@ func TestApplyKVFilesWithBatchMethod4(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, TableId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 2, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, TableId: 2, }, @@ -1229,35 +1171,35 @@ func TestApplyKVFilesWithBatchMethod5(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, TableId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 2, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, TableId: 2, }, @@ -1384,16 +1326,16 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { { client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), fakeSession{}) - cfg := &logclient.BuildTableMappingManagerConfig{CurrentIdMapSaved: false} - _, err := client.BuildTableMappingManager(ctx, cfg) + cfg := &logclient.GetIDMapConfig{LoadSavedIDMap: false} + _, err := client.GetBaseIDMap(ctx, cfg) require.Error(t, err) require.Regexp(t, "failed to get pitr id map from mysql.tidb_pitr_id_map.* [2, 1]", err.Error()) } { client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), fakeSession{}) - cfg := &logclient.BuildTableMappingManagerConfig{CurrentIdMapSaved: true} - _, err := client.BuildTableMappingManager(ctx, cfg) + cfg := &logclient.GetIDMapConfig{LoadSavedIDMap: true} + _, err := client.GetBaseIDMap(ctx, cfg) require.Error(t, err) require.Regexp(t, "failed to get pitr id map from mysql.tidb_pitr_id_map.* [1, 1]", err.Error()) } @@ -1406,8 +1348,8 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { se, err := g.CreateSession(s.Mock.Storage) require.NoError(t, err) client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), se) - cfg := &logclient.BuildTableMappingManagerConfig{CurrentIdMapSaved: true} - _, err = client.BuildTableMappingManager(ctx, cfg) + cfg := &logclient.GetIDMapConfig{LoadSavedIDMap: true} + _, err = client.GetBaseIDMap(ctx, cfg) require.Error(t, err) require.Contains(t, err.Error(), "miss upstream table information at `start-ts`(1) but the full backup path is not specified") } @@ -1478,7 +1420,7 @@ func TestPITRIDMap(t *testing.T) { require.NoError(t, err) client := logclient.TEST_NewLogClient(123, 1, 2, 3, nil, se) baseTableMappingManager := &stream.TableMappingManager{ - DbReplaceMap: getDBMap(), + DBReplaceMap: getDBMap(), } err = client.TEST_saveIDMap(ctx, baseTableMappingManager) require.NoError(t, err) @@ -1492,9 +1434,9 @@ func TestPITRIDMap(t *testing.T) { newSchemaReplaces, err = client.TEST_initSchemasMap(ctx, 2) require.NoError(t, err) - require.Equal(t, len(baseTableMappingManager.DbReplaceMap), len(newSchemaReplaces)) + require.Equal(t, len(baseTableMappingManager.DBReplaceMap), len(newSchemaReplaces)) for _, dbMap := range newSchemaReplaces { - baseDbMap := baseTableMappingManager.DbReplaceMap[dbMap.IdMap.UpstreamId] + baseDbMap := baseTableMappingManager.DBReplaceMap[dbMap.IdMap.UpstreamId] require.NotNil(t, baseDbMap) require.Equal(t, baseDbMap.DbID, dbMap.IdMap.DownstreamId) require.Equal(t, baseDbMap.Name, dbMap.Name) @@ -1986,3 +1928,23 @@ func fakeRowKey(tableID, rowID int64) kv.Key { func fakeRowRawKey(tableID, rowID int64) kv.Key { return tablecodec.EncodeRecordKey(tablecodec.GenTableRecordPrefix(tableID), kv.IntHandle(rowID)) } + +type mockBatchProcessor struct { + processFunc func( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*logclient.KvEntryWithTS, + filterTS uint64, + cf string, + ) ([]*logclient.KvEntryWithTS, error) +} + +func (m *mockBatchProcessor) ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*logclient.KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*logclient.KvEntryWithTS, error) { + return m.processFunc(ctx, files, entries, filterTS, cf) +} diff --git a/br/pkg/restore/log_client/export_test.go b/br/pkg/restore/log_client/export_test.go index f78a54bf50c8a..0f9be1d696f65 100644 --- a/br/pkg/restore/log_client/export_test.go +++ b/br/pkg/restore/log_client/export_test.go @@ -65,19 +65,19 @@ func (rc *LogClient) TEST_saveIDMap( ctx context.Context, m *stream.TableMappingManager, ) error { - return rc.saveIDMap(ctx, m) + return rc.SaveIdMapWithFailPoints(ctx, m) } func (rc *LogClient) TEST_initSchemasMap( ctx context.Context, restoreTS uint64, ) ([]*backuppb.PitrDBMap, error) { - return rc.initSchemasMap(ctx, restoreTS) + return rc.loadSchemasMap(ctx, restoreTS) } // readStreamMetaByTS is used for streaming task. collect all meta file by TS, it is for test usage. -func (rc *LogFileManager) ReadStreamMeta(ctx context.Context) ([]*MetaName, error) { - metas, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) ReadStreamMeta(ctx context.Context) ([]*MetaName, error) { + metas, err := lm.streamingMeta(ctx) if err != nil { return nil, err } diff --git a/br/pkg/restore/log_client/import.go b/br/pkg/restore/log_client/import.go index a4dbf4ca73839..0b5581c706fa2 100644 --- a/br/pkg/restore/log_client/import.go +++ b/br/pkg/restore/log_client/import.go @@ -35,9 +35,9 @@ import ( importclient "github.com/pingcap/tidb/br/pkg/restore/internal/import_client" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/kv" pd "github.com/tikv/pd/client" "go.uber.org/multierr" @@ -253,7 +253,7 @@ func (importer *LogFileImporter) downloadAndApplyKVFile( RangeLength: file.RangeLength, IsDelete: file.Type == backuppb.FileType_Delete, StartTs: func() uint64 { - if file.Cf == stream.DefaultCF { + if file.Cf == consts.DefaultCF { return shiftStartTS } return startTS diff --git a/br/pkg/restore/log_client/log_file_manager.go b/br/pkg/restore/log_client/log_file_manager.go index 4c2992467a2ab..ae44b3c4fbb4e 100644 --- a/br/pkg/restore/log_client/log_file_manager.go +++ b/br/pkg/restore/log_client/log_file_manager.go @@ -19,6 +19,8 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" @@ -84,6 +86,7 @@ type streamMetadataHelper interface { encryptionInfo *encryptionpb.FileEncryptionInfo, ) ([]byte, error) ParseToMetadata(rawMetaData []byte) (*backuppb.Metadata, error) + Close() } // LogFileManager is the manager for log files of a certain restoration, @@ -103,8 +106,8 @@ type LogFileManager struct { storage storage.ExternalStorage helper streamMetadataHelper - withMigraionBuilder *WithMigrationsBuilder - withMigrations *WithMigrations + withMigrationBuilder *WithMigrationsBuilder + withMigrations *WithMigrations metadataDownloadBatchSize uint } @@ -130,12 +133,12 @@ type DDLMetaGroup struct { // Generally the config cannot be changed during its lifetime. func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*LogFileManager, error) { fm := &LogFileManager{ - startTS: init.StartTS, - restoreTS: init.RestoreTS, - storage: init.Storage, - helper: stream.NewMetadataHelper(stream.WithEncryptionManager(init.EncryptionManager)), - withMigraionBuilder: init.MigrationsBuilder, - withMigrations: init.Migrations, + startTS: init.StartTS, + restoreTS: init.RestoreTS, + storage: init.Storage, + helper: stream.NewMetadataHelper(stream.WithEncryptionManager(init.EncryptionManager)), + withMigrationBuilder: init.MigrationsBuilder, + withMigrations: init.Migrations, metadataDownloadBatchSize: init.MetadataDownloadBatchSize, } @@ -146,30 +149,30 @@ func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*LogFil return fm, nil } -func (rc *LogFileManager) BuildMigrations(migs []*backuppb.Migration) { - w := rc.withMigraionBuilder.Build(migs) - rc.withMigrations = &w +func (lm *LogFileManager) BuildMigrations(migs []*backuppb.Migration) { + w := lm.withMigrationBuilder.Build(migs) + lm.withMigrations = &w } -func (rc *LogFileManager) ShiftTS() uint64 { - return rc.shiftStartTS +func (lm *LogFileManager) ShiftTS() uint64 { + return lm.shiftStartTS } -func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { +func (lm *LogFileManager) loadShiftTS(ctx context.Context) error { shiftTS := struct { sync.Mutex value uint64 exists bool }{} - err := stream.FastUnmarshalMetaData(ctx, rc.storage, rc.metadataDownloadBatchSize, func(path string, raw []byte) error { - m, err := rc.helper.ParseToMetadata(raw) + err := stream.FastUnmarshalMetaData(ctx, lm.storage, lm.metadataDownloadBatchSize, func(path string, raw []byte) error { + m, err := lm.helper.ParseToMetadata(raw) if err != nil { return err } log.Info("read meta from storage and parse", zap.String("path", path), zap.Uint64("min-ts", m.MinTs), zap.Uint64("max-ts", m.MaxTs), zap.Int32("meta-version", int32(m.MetaVersion))) - ts, ok := stream.UpdateShiftTS(m, rc.startTS, rc.restoreTS) + ts, ok := stream.UpdateShiftTS(m, lm.startTS, lm.restoreTS) shiftTS.Lock() if ok && (!shiftTS.exists || shiftTS.value > ts) { shiftTS.value = ts @@ -183,31 +186,31 @@ func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { return err } if !shiftTS.exists { - rc.shiftStartTS = rc.startTS - rc.withMigraionBuilder.SetShiftStartTS(rc.shiftStartTS) + lm.shiftStartTS = lm.startTS + lm.withMigrationBuilder.SetShiftStartTS(lm.shiftStartTS) return nil } - rc.shiftStartTS = shiftTS.value - rc.withMigraionBuilder.SetShiftStartTS(rc.shiftStartTS) + lm.shiftStartTS = shiftTS.value + lm.withMigrationBuilder.SetShiftStartTS(lm.shiftStartTS) return nil } -func (rc *LogFileManager) streamingMeta(ctx context.Context) (MetaNameIter, error) { - return rc.streamingMetaByTS(ctx, rc.restoreTS) +func (lm *LogFileManager) streamingMeta(ctx context.Context) (MetaNameIter, error) { + return lm.streamingMetaByTS(ctx, lm.restoreTS) } -func (rc *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaNameIter, error) { - it, err := rc.createMetaIterOver(ctx, rc.storage) +func (lm *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaNameIter, error) { + it, err := lm.createMetaIterOver(ctx, lm.storage) if err != nil { return nil, err } filtered := iter.FilterOut(it, func(metaname *MetaName) bool { - return restoreTS < metaname.meta.MinTs || metaname.meta.MaxTs < rc.shiftStartTS + return restoreTS < metaname.meta.MinTs || metaname.meta.MaxTs < lm.shiftStartTS }) return filtered, nil } -func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaNameIter, error) { +func (lm *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaNameIter, error) { opt := &storage.WalkOption{SubDir: stream.GetStreamBackupMetaPrefix()} names := []string{} err := s.WalkDir(ctx, opt, func(path string, size int64) error { @@ -226,7 +229,7 @@ func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.Exte if err != nil { return nil, errors.Annotatef(err, "failed during reading file %s", name) } - meta, err := rc.helper.ParseToMetadata(f) + meta, err := lm.helper.ParseToMetadata(f) if err != nil { return nil, errors.Annotatef(err, "failed to parse metadata of file %s", name) } @@ -235,12 +238,12 @@ func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.Exte // TODO: maybe we need to be able to adjust the concurrency to download files, // which currently is the same as the chunk size reader := iter.Transform(namesIter, readMeta, - iter.WithChunkSize(rc.metadataDownloadBatchSize), iter.WithConcurrency(rc.metadataDownloadBatchSize)) + iter.WithChunkSize(lm.metadataDownloadBatchSize), iter.WithConcurrency(lm.metadataDownloadBatchSize)) return reader, nil } -func (rc *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { - ms := rc.withMigrations.Metas(m) +func (lm *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { + ms := lm.withMigrations.Metas(m) return iter.FlatMap(ms, func(m *MetaWithMigrations) LogIter { gs := m.Physicals(iter.Enumerate(iter.FromSlice(m.meta.FileGroups))) return iter.FlatMap(gs, func(gim *PhysicalWithMigrations) LogIter { @@ -251,7 +254,7 @@ func (rc *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { if m.meta.MetaVersion > backuppb.MetaVersion_V1 { di.Item.Path = gim.physical.Item.Path } - return di.Item.IsMeta || rc.ShouldFilterOut(di.Item) + return di.Item.IsMeta || lm.ShouldFilterOutByTs(di.Item) }) return iter.Map(fs, func(di FileIndex) *LogDataFileInfo { return &LogDataFileInfo{ @@ -270,14 +273,14 @@ func (rc *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { }) } -// ShouldFilterOut checks whether a file should be filtered out via the current client. -func (rc *LogFileManager) ShouldFilterOut(d *backuppb.DataFileInfo) bool { - return d.MinTs > rc.restoreTS || - (d.Cf == stream.WriteCF && d.MaxTs < rc.startTS) || - (d.Cf == stream.DefaultCF && d.MaxTs < rc.shiftStartTS) +// ShouldFilterOutByTs checks whether a file should be filtered out via the current client. +func (lm *LogFileManager) ShouldFilterOutByTs(d *backuppb.DataFileInfo) bool { + return d.MinTs > lm.restoreTS || + (d.Cf == consts.WriteCF && d.MaxTs < lm.startTS) || + (d.Cf == consts.DefaultCF && d.MaxTs < lm.shiftStartTS) } -func (rc *LogFileManager) collectDDLFilesAndPrepareCache( +func (lm *LogFileManager) collectDDLFilesAndPrepareCache( ctx context.Context, files MetaGroupIter, ) ([]Log, error) { @@ -291,38 +294,38 @@ func (rc *LogFileManager) collectDDLFilesAndPrepareCache( dataFileInfos := make([]*backuppb.DataFileInfo, 0) for _, g := range fs.Item { - rc.helper.InitCacheEntry(g.Path, len(g.FileMetas)) + lm.helper.InitCacheEntry(g.Path, len(g.FileMetas)) dataFileInfos = append(dataFileInfos, g.FileMetas...) } return dataFileInfos, nil } -// LoadDDLFilesAndCountDMLFiles loads all DDL files needs to be restored in the restoration. +// LoadDDLFiles loads all DDL files needs to be restored in the restoration. // This function returns all DDL files needing directly because we need sort all of them. -func (rc *LogFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context) ([]Log, error) { - m, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) LoadDDLFiles(ctx context.Context) ([]Log, error) { + m, err := lm.streamingMeta(ctx) if err != nil { return nil, err } - mg := rc.FilterMetaFiles(m) + mg := lm.FilterMetaFiles(m) - return rc.collectDDLFilesAndPrepareCache(ctx, mg) + return lm.collectDDLFilesAndPrepareCache(ctx, mg) } // LoadDMLFiles loads all DML files needs to be restored in the restoration. // This function returns a stream, because there are usually many DML files need to be restored. -func (rc *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { - m, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { + m, err := lm.streamingMeta(ctx) if err != nil { return nil, err } - l := rc.FilterDataFiles(m) + l := lm.FilterDataFiles(m) return l, nil } -func (rc *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { +func (lm *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { return iter.FlatMap(ms, func(m *MetaName) MetaGroupIter { return iter.Map(iter.FromSlice(m.meta.FileGroups), func(g *backuppb.DataFileGroup) DDLMetaGroup { metas := iter.FilterOut(iter.FromSlice(g.DataFilesInfo), func(d Log) bool { @@ -330,7 +333,7 @@ func (rc *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { if m.meta.MetaVersion > backuppb.MetaVersion_V1 { d.Path = g.Path } - if rc.ShouldFilterOut(d) { + if lm.ShouldFilterOutByTs(d) { return true } // count the progress @@ -346,12 +349,12 @@ func (rc *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { }) } -// Fetch compactions that may contain file less than the TS. -func (rc *LogFileManager) GetCompactionIter(ctx context.Context) iter.TryNextor[*backuppb.LogFileSubcompaction] { - return rc.withMigrations.Compactions(ctx, rc.storage) +// GetCompactionIter fetches compactions that may contain file less than the TS. +func (lm *LogFileManager) GetCompactionIter(ctx context.Context) iter.TryNextor[*backuppb.LogFileSubcompaction] { + return lm.withMigrations.Compactions(ctx, lm.storage) } -// the kv entry with ts, the ts is decoded from entry. +// KvEntryWithTS is kv entry with ts, the ts is decoded from entry. type KvEntryWithTS struct { E kv.Entry Ts uint64 @@ -367,17 +370,17 @@ func getKeyTS(key []byte) (uint64, error) { return ts, err } -// ReadAllEntries loads content of a log file, with filtering out no needed entries. -func (rc *LogFileManager) ReadAllEntries( +// ReadFilteredEntriesFromFiles loads content of a log file from external storage, and filter out entries based on TS. +func (lm *LogFileManager) ReadFilteredEntriesFromFiles( ctx context.Context, file Log, filterTS uint64, ) ([]*KvEntryWithTS, []*KvEntryWithTS, error) { kvEntries := make([]*KvEntryWithTS, 0) - nextKvEntries := make([]*KvEntryWithTS, 0) + filteredOutKvEntries := make([]*KvEntryWithTS, 0) - buff, err := rc.helper.ReadFile(ctx, file.Path, file.RangeOffset, file.RangeLength, file.CompressionType, - rc.storage, file.FileEncryptionInfo) + buff, err := lm.helper.ReadFile(ctx, file.Path, file.RangeOffset, file.RangeLength, file.CompressionType, + lm.storage, file.FileEncryptionInfo) if err != nil { return nil, nil, errors.Trace(err) } @@ -396,7 +399,7 @@ func (rc *LogFileManager) ReadAllEntries( txnEntry := kv.Entry{Key: iter.Key(), Value: iter.Value()} - if !stream.MaybeDBOrDDLJobHistoryKey(txnEntry.Key) { + if !utils.IsDBOrDDLJobHistoryKey(txnEntry.Key) { // only restore mDB and mDDLHistory continue } @@ -408,11 +411,11 @@ func (rc *LogFileManager) ReadAllEntries( // The commitTs in write CF need be limited on [startTs, restoreTs]. // We can restore more key-value in default CF. - if ts > rc.restoreTS { + if ts > lm.restoreTS { continue - } else if file.Cf == stream.WriteCF && ts < rc.startTS { + } else if file.Cf == consts.WriteCF && ts < lm.startTS { continue - } else if file.Cf == stream.DefaultCF && ts < rc.shiftStartTS { + } else if file.Cf == consts.DefaultCF && ts < lm.shiftStartTS { continue } @@ -428,11 +431,17 @@ func (rc *LogFileManager) ReadAllEntries( if ts < filterTS { kvEntries = append(kvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } else { - nextKvEntries = append(nextKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) + filteredOutKvEntries = append(filteredOutKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } } - return kvEntries, nextKvEntries, nil + return kvEntries, filteredOutKvEntries, nil +} + +func (lm *LogFileManager) Close() { + if lm.helper != nil { + lm.helper.Close() + } } func Subcompactions(ctx context.Context, prefix string, s storage.ExternalStorage) SubCompactionIter { diff --git a/br/pkg/restore/log_client/log_file_manager_test.go b/br/pkg/restore/log_client/log_file_manager_test.go index 79813d6ef78f2..490c26f3ad6dc 100644 --- a/br/pkg/restore/log_client/log_file_manager_test.go +++ b/br/pkg/restore/log_client/log_file_manager_test.go @@ -23,6 +23,7 @@ import ( logclient "github.com/pingcap/tidb/br/pkg/restore/log_client" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" @@ -55,7 +56,7 @@ func wr(start, end uint64, minBegin uint64) *backuppb.DataFileInfo { MinTs: start, MaxTs: end, MinBeginTsInDefaultCf: minBegin, - Cf: stream.WriteCF, + Cf: consts.WriteCF, } } @@ -66,7 +67,7 @@ func dr(start, end uint64) *backuppb.DataFileInfo { Path: fmt.Sprintf("write-%06d", id), MinTs: start, MaxTs: end, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, } } @@ -491,7 +492,7 @@ func testFileManagerWithMeta(t *testing.T, m metaMaker) { ), ).Item } else { - data, err := fm.LoadDDLFilesAndCountDMLFiles(ctx) + data, err := fm.LoadDDLFiles(ctx) req.NoError(err) r = data } @@ -618,8 +619,8 @@ func TestReadAllEntries(t *testing.T) { data, file := generateKvData() fm := logclient.TEST_NewLogFileManager(35, 75, 25, &logclient.FakeStreamMetadataHelper{Data: data}) { - file.Cf = stream.WriteCF - kvEntries, nextKvEntries, err := fm.ReadAllEntries(ctx, file, 50) + file.Cf = consts.WriteCF + kvEntries, nextKvEntries, err := fm.ReadFilteredEntriesFromFiles(ctx, file, 50) require.NoError(t, err) require.Equal(t, []*logclient.KvEntryWithTS{ encodekvEntryWithTS("mDDL", 37), @@ -631,8 +632,8 @@ func TestReadAllEntries(t *testing.T) { }, nextKvEntries) } { - file.Cf = stream.DefaultCF - kvEntries, nextKvEntries, err := fm.ReadAllEntries(ctx, file, 50) + file.Cf = consts.DefaultCF + kvEntries, nextKvEntries, err := fm.ReadFilteredEntriesFromFiles(ctx, file, 50) require.NoError(t, err) require.Equal(t, []*logclient.KvEntryWithTS{ encodekvEntryWithTS("mDDL", 27), diff --git a/br/pkg/restore/snap_client/BUILD.bazel b/br/pkg/restore/snap_client/BUILD.bazel index 0a1a31cbd01c0..128579806f6a6 100644 --- a/br/pkg/restore/snap_client/BUILD.bazel +++ b/br/pkg/restore/snap_client/BUILD.bazel @@ -45,7 +45,6 @@ go_library( "//pkg/util/codec", "//pkg/util/engine", "//pkg/util/redact", - "//pkg/util/table-filter", "@com_github_google_uuid//:uuid", "@com_github_opentracing_opentracing_go//:opentracing-go", "@com_github_pingcap_errors//:errors", diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index ae878b0e9e0ca..5dde9958d5572 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -422,8 +422,8 @@ func makeDBPool(size uint, dbFactory func() (*tidallocdb.DB, error)) ([]*tidallo return dbPool, nil } -// Init create db connection and domain for storage. -func (rc *SnapClient) Init(g glue.Glue, store kv.Storage) error { +// InitConnections create db connection and domain for storage. +func (rc *SnapClient) InitConnections(g glue.Glue, store kv.Storage) error { // setDB must happen after set PolicyMode. // we will use policyMode to set session variables. var err error @@ -565,7 +565,7 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage return nil } -func (rc *SnapClient) needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { +func needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { return !(backupMeta.IsRawKv || backupMeta.IsTxnKv) } @@ -579,7 +579,7 @@ func (rc *SnapClient) LoadSchemaIfNeededAndInitClient( RawStartKey []byte, RawEndKey []byte, ) error { - if rc.needLoadSchemas(backupMeta) { + if needLoadSchemas(backupMeta) { databases, err := metautil.LoadBackupTables(c, reader, loadStats) if err != nil { return errors.Trace(err) @@ -685,12 +685,21 @@ func (rc *SnapClient) GetDatabases() []*metautil.Database { return dbs } +// GetDatabaseMap returns all databases in a map indexed by db id +func (rc *SnapClient) GetDatabaseMap() map[int64]*metautil.Database { + dbMap := make(map[int64]*metautil.Database) + for _, db := range rc.databases { + dbMap[db.Info.ID] = db + } + return dbMap +} + // HasBackedUpSysDB whether we have backed up system tables // br backs system tables up since 5.1.0 func (rc *SnapClient) HasBackedUpSysDB() bool { sysDBs := []string{"mysql", "sys"} for _, db := range sysDBs { - temporaryDB := utils.TemporaryDBName(db) + temporaryDB := utils.WithTemporaryDBNamePrefix(db) _, backedUp := rc.databases[temporaryDB.O] if backedUp { return true diff --git a/br/pkg/restore/snap_client/client_test.go b/br/pkg/restore/snap_client/client_test.go index 4b96877949e23..ded9e6ea24ed6 100644 --- a/br/pkg/restore/snap_client/client_test.go +++ b/br/pkg/restore/snap_client/client_test.go @@ -49,7 +49,7 @@ func TestCreateTables(t *testing.T) { m := mc g := gluetidb.New() client := snapclient.NewRestoreClient(m.PDClient, m.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, m.Storage) + err := client.InitConnections(g, m.Storage) require.NoError(t, err) info, err := m.Domain.GetSnapshotInfoSchema(math.MaxUint64) @@ -120,7 +120,7 @@ func TestNeedCheckTargetClusterFresh(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) // not set filter and first run with checkpoint @@ -150,7 +150,7 @@ func TestCheckTargetClusterFresh(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) ctx := context.Background() @@ -167,7 +167,7 @@ func TestCheckTargetClusterFreshWithTable(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) ctx := context.Background() @@ -202,7 +202,7 @@ func TestInitFullClusterRestore(t *testing.T) { cluster := mc g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) // explicit filter diff --git a/br/pkg/restore/snap_client/systable_restore.go b/br/pkg/restore/snap_client/systable_restore.go index 47c963d5f38d2..44444627734f9 100644 --- a/br/pkg/restore/snap_client/systable_restore.go +++ b/br/pkg/restore/snap_client/systable_restore.go @@ -19,7 +19,6 @@ import ( "github.com/pingcap/tidb/pkg/meta/model" pmodel "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/parser/mysql" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "go.uber.org/multierr" "go.uber.org/zap" ) @@ -98,7 +97,7 @@ func isStatsTable(schemaName string, tableName string) bool { // RestoreSystemSchemas restores the system schema(i.e. the `mysql` schema). // Detail see https://github.com/pingcap/br/issues/679#issuecomment-762592254. -func (rc *SnapClient) RestoreSystemSchemas(ctx context.Context, f filter.Filter) (rerr error) { +func (rc *SnapClient) RestoreSystemSchemas(ctx context.Context, f *utils.CombinedFilter) (rerr error) { sysDBs := []string{mysql.SystemDB, mysql.SysDB} for _, sysDB := range sysDBs { err := rc.restoreSystemSchema(ctx, f, sysDB) @@ -111,17 +110,16 @@ func (rc *SnapClient) RestoreSystemSchemas(ctx context.Context, f filter.Filter) // restoreSystemSchema restores a system schema(i.e. the `mysql` or `sys` schema). // Detail see https://github.com/pingcap/br/issues/679#issuecomment-762592254. -func (rc *SnapClient) restoreSystemSchema(ctx context.Context, f filter.Filter, sysDB string) (rerr error) { - temporaryDB := utils.TemporaryDBName(sysDB) +func (rc *SnapClient) restoreSystemSchema(ctx context.Context, f *utils.CombinedFilter, sysDB string) (rerr error) { + temporaryDB := utils.WithTemporaryDBNamePrefix(sysDB) defer func() { // Don't clean the temporary database for next restore with checkpoint. if rerr == nil { rc.cleanTemporaryDatabase(ctx, sysDB) } }() - if !f.MatchSchema(sysDB) || !rc.withSysTable { - log.Debug("system database filtered out", zap.String("database", sysDB)) + log.Info("system database filtered out", zap.String("database", sysDB)) return nil } originDatabase, ok := rc.databases[temporaryDB.O] @@ -177,7 +175,7 @@ func (rc *SnapClient) getSystemDatabaseByName(ctx context.Context, name string) db := &database{ ExistingTables: map[string]*model.TableInfo{}, Name: pmodel.NewCIStr(name), - TemporaryName: utils.TemporaryDBName(name), + TemporaryName: utils.WithTemporaryDBNamePrefix(name), } // It's OK to get all the tables from system tables. tableInfos, err := infoSchema.SchemaTableInfos(ctx, schema.Name) @@ -299,7 +297,7 @@ func (rc *SnapClient) replaceTemporaryTableToSystable(ctx context.Context, ti *m } func (rc *SnapClient) cleanTemporaryDatabase(ctx context.Context, originDB string) { - database := utils.TemporaryDBName(originDB) + database := utils.WithTemporaryDBNamePrefix(originDB) log.Debug("dropping temporary database", zap.Stringer("database", database)) sql := fmt.Sprintf("DROP DATABASE IF EXISTS %s", utils.EncloseName(database.L)) if err := rc.db.Session().Execute(ctx, sql); err != nil { diff --git a/br/pkg/restore/snap_client/systable_restore_test.go b/br/pkg/restore/snap_client/systable_restore_test.go index cc95160482d45..ecfee669cae2b 100644 --- a/br/pkg/restore/snap_client/systable_restore_test.go +++ b/br/pkg/restore/snap_client/systable_restore_test.go @@ -36,7 +36,7 @@ func TestCheckSysTableCompatibility(t *testing.T) { cluster := mc g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) info, err := cluster.Domain.GetSnapshotInfoSchema(math.MaxUint64) @@ -44,7 +44,7 @@ func TestCheckSysTableCompatibility(t *testing.T) { dbSchema, isExist := info.SchemaByName(pmodel.NewCIStr(mysql.SystemDB)) require.True(t, isExist) tmpSysDB := dbSchema.Clone() - tmpSysDB.Name = utils.TemporaryDBName(mysql.SystemDB) + tmpSysDB.Name = utils.WithTemporaryDBNamePrefix(mysql.SystemDB) sysDB := pmodel.NewCIStr(mysql.SystemDB) userTI, err := restore.GetTableSchema(cluster.Domain, sysDB, pmodel.NewCIStr("user")) require.NoError(t, err) diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder.go b/br/pkg/restore/tiflashrec/tiflash_recorder.go index c87f0372f86a6..8cdcfa10182f5 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder.go @@ -130,15 +130,15 @@ func (r *TiFlashRecorder) GenerateResetAlterTableDDLs(info infoschema.InfoSchema func (r *TiFlashRecorder) GenerateAlterTableDDLs(info infoschema.InfoSchema) []string { items := make([]string, 0, len(r.items)) - r.Iterate(func(id int64, replica model.TiFlashReplicaInfo) { - table, ok := info.TableByID(context.Background(), id) + r.Iterate(func(tableId int64, replica model.TiFlashReplicaInfo) { + table, ok := info.TableByID(context.Background(), tableId) if !ok { - log.Warn("Table do not exist, skipping", zap.Int64("id", id)) + log.Warn("Table does not exist, might get filtered out if a custom filter is specified, skipping", zap.Int64("tableId", tableId)) return } schema, ok := infoschema.SchemaByTable(info, table.Meta()) if !ok { - log.Warn("Schema do not exist, skipping", zap.Int64("id", id), zap.Stringer("table", table.Meta().Name)) + log.Warn("Schema do not exist, skipping", zap.Int64("tableId", tableId), zap.Stringer("table", table.Meta().Name)) return } altTableSpec, err := alterTableSpecOf(replica, false) diff --git a/br/pkg/stream/BUILD.bazel b/br/pkg/stream/BUILD.bazel index 252f789c78b75..c50ee85f63aa7 100644 --- a/br/pkg/stream/BUILD.bazel +++ b/br/pkg/stream/BUILD.bazel @@ -4,14 +4,15 @@ go_library( name = "stream", srcs = [ "decode_kv.go", + "logging_helper.go", "meta_kv.go", "rewrite_meta_rawkv.go", "search.go", "stream_metas.go", "stream_mgr.go", "stream_status.go", + "table_history.go", "table_mapping.go", - "util.go", ], importpath = "github.com/pingcap/tidb/br/pkg/stream", visibility = ["//visibility:public"], @@ -25,6 +26,8 @@ go_library( "//br/pkg/restore/tiflashrec", "//br/pkg/storage", "//br/pkg/streamhelper", + "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//pkg/ddl", "//pkg/kv", @@ -34,7 +37,6 @@ go_library( "//pkg/util", "//pkg/util/codec", "//pkg/util/mathutil", - "//pkg/util/table-filter", "//pkg/util/versioninfo", "@com_github_docker_go_units//:go-units", "@com_github_fatih_color//:color", @@ -49,6 +51,7 @@ go_library( "@org_golang_x_sync//errgroup", "@org_uber_go_multierr//:multierr", "@org_uber_go_zap//:zap", + "@org_uber_go_zap//zapcore", ], ) @@ -63,14 +66,15 @@ go_test( "stream_metas_test.go", "stream_misc_test.go", "table_mapping_test.go", - "util_test.go", ], embed = [":stream"], flaky = True, - shard_count = 47, + shard_count = 48, deps = [ "//br/pkg/storage", "//br/pkg/streamhelper", + "//br/pkg/utils", + "//br/pkg/utils/consts", "//pkg/ddl", "//pkg/meta", "//pkg/meta/model", @@ -81,14 +85,12 @@ go_test( "//pkg/types", "//pkg/util/codec", "//pkg/util/intest", - "//pkg/util/table-filter", "@com_github_fsouza_fake_gcs_server//fakestorage", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", - "@com_github_tikv_client_go_v2//oracle", "@org_golang_x_exp//maps", "@org_uber_go_multierr//:multierr", "@org_uber_go_zap//:zap", diff --git a/br/pkg/stream/logging_helper.go b/br/pkg/stream/logging_helper.go new file mode 100644 index 0000000000000..b67571aa77a47 --- /dev/null +++ b/br/pkg/stream/logging_helper.go @@ -0,0 +1,40 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stream + +import ( + "github.com/pingcap/log" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +func LogDBReplaceMap(title string, dbReplaces map[UpstreamID]*DBReplace) { + for upstreamDbId, dbReplace := range dbReplaces { + log.Info(title, func() []zapcore.Field { + fields := make([]zapcore.Field, 0, (len(dbReplace.TableMap)+1)*3) + fields = append(fields, + zap.String("dbName", dbReplace.Name), + zap.Int64("upstreamId", upstreamDbId), + zap.Int64("downstreamId", dbReplace.DbID)) + for upstreamTableID, tableReplace := range dbReplace.TableMap { + fields = append(fields, + zap.String("table", tableReplace.Name), + zap.Int64("upstreamId", upstreamTableID), + zap.Int64("downstreamId", tableReplace.TableID)) + } + return fields + }()...) + } +} diff --git a/br/pkg/stream/meta_kv_test.go b/br/pkg/stream/meta_kv_test.go index 0ac5b54763022..9f64d69b3f5f6 100644 --- a/br/pkg/stream/meta_kv_test.go +++ b/br/pkg/stream/meta_kv_test.go @@ -6,18 +6,12 @@ import ( "bytes" "testing" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/meta" - "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) -func encodeTxnMetaKey(key []byte, field []byte, ts uint64) []byte { - k := tablecodec.EncodeMetaKey(key, field) - txnKey := codec.EncodeBytes(nil, k) - return codec.EncodeUintDesc(txnKey, ts) -} - func TestRawMetaKeyForDB(t *testing.T) { var ( dbID int64 = 1 @@ -25,7 +19,7 @@ func TestRawMetaKeyForDB(t *testing.T) { mDbs = []byte("DBs") ) - txnKey := encodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) + txnKey := utils.EncodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) rawMetaKey, err := ParseTxnMetaKeyFrom(txnKey) require.NoError(t, err) @@ -44,7 +38,7 @@ func TestRawMetaKeyForTable(t *testing.T) { tableID int64 = 57 ts uint64 = 400036290571534337 ) - txnKey := encodeTxnMetaKey(meta.DBkey(dbID), meta.TableKey(tableID), ts) + txnKey := utils.EncodeTxnMetaKey(meta.DBkey(dbID), meta.TableKey(tableID), ts) rawMetakey, err := ParseTxnMetaKeyFrom(txnKey) require.NoError(t, err) diff --git a/br/pkg/stream/rewrite_meta_rawkv.go b/br/pkg/stream/rewrite_meta_rawkv.go index 94e05221b2424..b2a3a3fb9e63e 100644 --- a/br/pkg/stream/rewrite_meta_rawkv.go +++ b/br/pkg/stream/rewrite_meta_rawkv.go @@ -24,24 +24,20 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/restore/ingestrec" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "go.uber.org/zap" ) -// Default columnFamily and write columnFamily -const ( - DefaultCF = "default" - WriteCF = "write" -) - +type RewriteStatus int type UpstreamID = int64 type DownstreamID = int64 -// TableReplace specifies table information mapping from up-stream cluster to up-stream cluster. +// TableReplace specifies table information mapping from up-stream cluster to down-stream cluster. type TableReplace struct { Name string TableID DownstreamID @@ -49,24 +45,23 @@ type TableReplace struct { IndexMap map[UpstreamID]DownstreamID } -// DBReplace specifies database information mapping from up-stream cluster to up-stream cluster. +// DBReplace specifies database information mapping from up-stream cluster to down-stream cluster. type DBReplace struct { Name string DbID DownstreamID TableMap map[UpstreamID]*TableReplace } -// SchemasReplace specifies schemas information mapping from up-stream cluster to up-stream cluster. +// SchemasReplace specifies schemas information mapping from up-stream cluster to down-stream cluster. type SchemasReplace struct { - DbMap map[UpstreamID]*DBReplace + DbReplaceMap map[UpstreamID]*DBReplace delRangeRecorder *brDelRangeExecWrapper ingestRecorder *ingestrec.IngestRecorder TiflashRecorder *tiflashrec.TiFlashRecorder - RewriteTS uint64 // used to rewrite commit ts in meta kv. - TableFilter filter.Filter // used to filter schema/table + RewriteTS uint64 // used to rewrite commit ts in meta kv. - AfterTableRewritten func(deleted bool, tableInfo *model.TableInfo) + AfterTableRewrittenFn func(deleted bool, tableInfo *model.TableInfo) } // NewTableReplace creates a TableReplace struct. @@ -90,14 +85,13 @@ func NewDBReplace(name string, newID DownstreamID) *DBReplace { // NewSchemasReplace creates a SchemasReplace struct. func NewSchemasReplace( - dbMap map[UpstreamID]*DBReplace, + dbReplaceMap map[UpstreamID]*DBReplace, tiflashRecorder *tiflashrec.TiFlashRecorder, restoreTS uint64, - tableFilter filter.Filter, recordDeleteRange func(*PreDelRangeQuery), ) *SchemasReplace { globalTableIdMap := make(map[UpstreamID]DownstreamID) - for _, dr := range dbMap { + for _, dr := range dbReplaceMap { for tblID, tr := range dr.TableMap { globalTableIdMap[tblID] = tr.TableID for oldpID, newpID := range tr.PartitionMap { @@ -107,12 +101,11 @@ func NewSchemasReplace( } return &SchemasReplace{ - DbMap: dbMap, + DbReplaceMap: dbReplaceMap, delRangeRecorder: newDelRangeExecWrapper(globalTableIdMap, recordDeleteRange), ingestRecorder: ingestrec.New(), TiflashRecorder: tiflashRecorder, RewriteTS: restoreTS, - TableFilter: tableFilter, } } @@ -127,14 +120,14 @@ func (sr *SchemasReplace) rewriteKeyForDB(key []byte, cf string) ([]byte, error) return nil, errors.Trace(err) } - dbMap, exist := sr.DbMap[dbID] + dbMap, exist := sr.DbReplaceMap[dbID] if !exist { // db filtered out return nil, nil } rawMetaKey.UpdateField(meta.DBkey(dbMap.DbID)) - if cf == WriteCF { + if cf == consts.WriteCF { rawMetaKey.UpdateTS(sr.RewriteTS) } return rawMetaKey.EncodeMetaKey(), nil @@ -146,7 +139,7 @@ func (sr *SchemasReplace) rewriteDBInfo(value []byte) ([]byte, error) { return nil, errors.Trace(err) } - dbMap, exist := sr.DbMap[dbInfo.ID] + dbMap, exist := sr.DbReplaceMap[dbInfo.ID] if !exist { // db filtered out return nil, nil @@ -206,7 +199,7 @@ func (sr *SchemasReplace) rewriteKeyForTable( return nil, errors.Trace(err) } - dbReplace, exist := sr.DbMap[dbID] + dbReplace, exist := sr.DbReplaceMap[dbID] if !exist { // db filtered out return nil, nil @@ -217,10 +210,9 @@ func (sr *SchemasReplace) rewriteKeyForTable( // table filtered out return nil, nil } - rawMetaKey.UpdateKey(meta.DBkey(dbReplace.DbID)) rawMetaKey.UpdateField(encodeField(tableReplace.TableID)) - if cf == WriteCF { + if cf == consts.WriteCF { rawMetaKey.UpdateTS(sr.RewriteTS) } return rawMetaKey.EncodeMetaKey(), nil @@ -239,7 +231,7 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er } // construct or find the id map. - dbReplace, exist = sr.DbMap[dbID] + dbReplace, exist = sr.DbReplaceMap[dbID] if !exist { // db filtered out return nil, nil @@ -269,8 +261,8 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er if tableInfo.TTLInfo != nil { tableInfo.TTLInfo.Enable = false } - if sr.AfterTableRewritten != nil { - sr.AfterTableRewritten(false, &tableInfo) + if sr.AfterTableRewrittenFn != nil { + sr.AfterTableRewrittenFn(false, &tableInfo) } // marshal to json @@ -311,8 +303,8 @@ func (sr *SchemasReplace) rewriteEntryForTable(e *kv.Entry, cf string) (*kv.Entr // for now, we rewrite key and value separately hence we cannot // get a view of (is_delete, table_id, table_info) at the same time :(. // Maybe we can extract the rewrite part from rewriteTableInfo. - if result.Deleted && sr.AfterTableRewritten != nil { - sr.AfterTableRewritten(true, &model.TableInfo{ID: newTableID}) + if result.Deleted && sr.AfterTableRewrittenFn != nil { + sr.AfterTableRewrittenFn(true, &model.TableInfo{ID: newTableID}) } return &kv.Entry{Key: newKey, Value: result.NewValue}, nil @@ -380,10 +372,10 @@ type rewriteResult struct { } // rewriteValue rewrite the value if cf is "default", or rewrite the shortValue if cf is "write". -func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]byte) ([]byte, error)) (rewriteResult, error) { +func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewriteFunc func([]byte) ([]byte, error)) (rewriteResult, error) { switch cf { - case DefaultCF: - newValue, err := rewrite(value) + case consts.DefaultCF: + newValue, err := rewriteFunc(value) if err != nil { return rewriteResult{}, errors.Trace(err) } @@ -391,7 +383,7 @@ func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]b NewValue: newValue, Deleted: false, }, nil - case WriteCF: + case consts.WriteCF: rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(value); err != nil { return rewriteResult{}, errors.Trace(err) @@ -415,7 +407,7 @@ func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]b }, nil } - shortValue, err := rewrite(rawWriteCFValue.GetShortValue()) + shortValue, err := rewriteFunc(rawWriteCFValue.GetShortValue()) if err != nil { log.Info("failed to rewrite short value", zap.ByteString("write-type", []byte{rawWriteCFValue.GetWriteType()}), @@ -434,11 +426,12 @@ func (sr *SchemasReplace) GetIngestRecorder() *ingestrec.IngestRecorder { return sr.ingestRecorder } -// RewriteKvEntry uses to rewrite tableID/dbID in entry.key and entry.value -func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, error) { - // skip mDDLJob - if !IsMetaDBKey(e.Key) { - if cf == DefaultCF && IsMetaDDLJobHistoryKey(e.Key) { // mDDLJobHistory +// RewriteMetaKvEntry uses to rewrite tableID/dbID in entry.key and entry.value +func (sr *SchemasReplace) RewriteMetaKvEntry(e *kv.Entry, cf string) (*kv.Entry, error) { + if !utils.IsMetaDBKey(e.Key) { + // need to special handle ddl job history during actual restore phase. The job history contains index ingestion + // and range deletion that need to be handled separately after restore. + if cf == consts.DefaultCF && utils.IsMetaDDLJobHistoryKey(e.Key) { // mDDLJobHistory job := &model.Job{} if err := job.Decode(e.Value); err != nil { log.Debug("failed to decode the job", @@ -448,7 +441,7 @@ func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, err return nil, nil } - return nil, sr.restoreFromHistory(job) + return nil, sr.processIngestIndexAndDeleteRangeFromJob(job) } return nil, nil } @@ -463,6 +456,7 @@ func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, err } else if !meta.IsDBkey(rawKey.Key) { return nil, nil } + if meta.IsTableKey(rawKey.Field) { return sr.rewriteEntryForTable(e, cf) } else if meta.IsAutoIncrementIDKey(rawKey.Field) { @@ -492,7 +486,10 @@ func (sr *SchemasReplace) tryRecordIngestIndex(job *model.Job) error { return nil } -func (sr *SchemasReplace) restoreFromHistory(job *model.Job) error { +// processIngestIndexAndDeleteRangeFromJob handles two special cases during log backup meta key replay. +// 1. index ingestion is not captured by the log backup, thus we need to restore them manually later +// 2. delete range also needs to be handled to clean up dropped table since it was previously relying on GC to clean it up +func (sr *SchemasReplace) processIngestIndexAndDeleteRangeFromJob(job *model.Job) error { if ddl.JobNeedGC(job) { if err := ddl.AddDelRangeJobInternal(context.TODO(), sr.delRangeRecorder, job); err != nil { return err @@ -549,7 +546,8 @@ func (bdr *brDelRangeExecWrapper) PrepareParamsList(sz int) { func (bdr *brDelRangeExecWrapper) RewriteTableID(tableID int64) (int64, bool) { newTableID, exists := bdr.globalTableIdMap[tableID] if !exists { - log.Warn("failed to find the downstream id when rewrite delete range", zap.Int64("old tableID", tableID)) + log.Warn("failed to find the downstream id when rewrite delete range, "+ + "it might due to table has been filtered out if filters have been specified", zap.Int64("old tableID", tableID)) } return newTableID, exists } diff --git a/br/pkg/stream/rewrite_meta_rawkv_test.go b/br/pkg/stream/rewrite_meta_rawkv_test.go index 81d21e5e5b5a7..de505b5c48dd3 100644 --- a/br/pkg/stream/rewrite_meta_rawkv_test.go +++ b/br/pkg/stream/rewrite_meta_rawkv_test.go @@ -7,6 +7,8 @@ import ( "encoding/json" "testing" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" @@ -15,7 +17,6 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/types" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/stretchr/testify/require" ) @@ -30,7 +31,6 @@ func MockEmptySchemasReplace(midr *mockInsertDeleteRange, dbMap map[UpstreamID]* dbMap, nil, 9527, - filter.All(), midr.mockRecordDeleteRange, ) } @@ -60,7 +60,7 @@ func TestRewriteKeyForDB(t *testing.T) { mDbs = []byte("DBs") ) - encodedKey := encodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) + encodedKey := utils.EncodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) dbMap := make(map[UpstreamID]*DBReplace) downstreamID := dbID + 100 @@ -70,7 +70,7 @@ func TestRewriteKeyForDB(t *testing.T) { sr := MockEmptySchemasReplace(nil, dbMap) // set restoreKV status and rewrite it. - newKey, err := sr.rewriteKeyForDB(encodedKey, DefaultCF) + newKey, err := sr.rewriteKeyForDB(encodedKey, consts.DefaultCF) require.Nil(t, err) decodedKey, err := ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -80,7 +80,7 @@ func TestRewriteKeyForDB(t *testing.T) { require.Equal(t, newDBID, downstreamID) // rewrite it again, and get the same result. - newKey, err = sr.rewriteKeyForDB(encodedKey, WriteCF) + newKey, err = sr.rewriteKeyForDB(encodedKey, consts.WriteCF) require.Nil(t, err) decodedKey, err = ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -111,16 +111,16 @@ func TestRewriteDBInfo(t *testing.T) { require.Nil(t, err) err = json.Unmarshal(newValue, &DBInfo) require.Nil(t, err) - require.Equal(t, DBInfo.ID, sr.DbMap[dbID].DbID) + require.Equal(t, DBInfo.ID, sr.DbReplaceMap[dbID].DbID) // rewrite again, and get the same result. - newId := sr.DbMap[dbID].DbID + newId := sr.DbReplaceMap[dbID].DbID newValue, err = sr.rewriteDBInfo(value) require.Nil(t, err) err = json.Unmarshal(newValue, &DBInfo) require.Nil(t, err) - require.Equal(t, DBInfo.ID, sr.DbMap[dbID].DbID) - require.Equal(t, newId, sr.DbMap[dbID].DbID) + require.Equal(t, DBInfo.ID, sr.DbReplaceMap[dbID].DbID) + require.Equal(t, newId, sr.DbReplaceMap[dbID].DbID) } func TestRewriteKeyForTable(t *testing.T) { @@ -158,7 +158,7 @@ func TestRewriteKeyForTable(t *testing.T) { } for _, ca := range cases { - encodedKey := encodeTxnMetaKey(meta.DBkey(dbID), ca.encodeTableFn(tableID), ts) + encodedKey := utils.EncodeTxnMetaKey(meta.DBkey(dbID), ca.encodeTableFn(tableID), ts) dbMap := make(map[UpstreamID]*DBReplace) downStreamDbID := dbID + 100 @@ -170,7 +170,7 @@ func TestRewriteKeyForTable(t *testing.T) { sr := MockEmptySchemasReplace(nil, dbMap) // set restoreKV status and rewrite it. - newKey, err := sr.rewriteKeyForTable(encodedKey, DefaultCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err := sr.rewriteKeyForTable(encodedKey, consts.DefaultCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) decodedKey, err := ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -184,7 +184,7 @@ func TestRewriteKeyForTable(t *testing.T) { require.Equal(t, newTblID, downStreamTblID) // rewrite it again, and get the same result. - newKey, err = sr.rewriteKeyForTable(encodedKey, WriteCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err = sr.rewriteKeyForTable(encodedKey, consts.WriteCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) decodedKey, err = ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -218,7 +218,7 @@ func TestRewriteTableInfo(t *testing.T) { // create schemasReplace. sr := MockEmptySchemasReplace(nil, dbMap) tableCount := 0 - sr.AfterTableRewritten = func(deleted bool, tableInfo *model.TableInfo) { + sr.AfterTableRewrittenFn = func(deleted bool, tableInfo *model.TableInfo) { tableCount++ tableInfo.TiFlashReplica = &model.TiFlashReplicaInfo{ Count: 1, @@ -230,17 +230,17 @@ func TestRewriteTableInfo(t *testing.T) { require.Nil(t, err) err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.EqualValues(t, tableInfo.TiFlashReplica.Count, 1) // rewrite it again and get the same result. - newID := sr.DbMap[dbId].TableMap[tableID].TableID + newID := sr.DbReplaceMap[dbId].TableMap[tableID].TableID newValue, err = sr.rewriteTableInfo(value, dbId) require.Nil(t, err) err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) - require.Equal(t, newID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) + require.Equal(t, newID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.EqualValues(t, tableCount, 2) } @@ -292,7 +292,6 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { dbMap, nil, 0, - filter.All(), nil, ) @@ -302,11 +301,11 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) require.Equal(t, tableInfo.Name.String(), tableName) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.Equal( t, tableInfo.Partition.Definitions[0].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID], ) require.Equal( t, @@ -316,7 +315,7 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { require.Equal( t, tableInfo.Partition.Definitions[1].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID], ) require.Equal( t, @@ -325,8 +324,8 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { ) // rewrite it aggin, and get the same result. - newID1 := sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID] - newID2 := sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID] + newID1 := sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID] + newID2 := sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID] newValue, err = sr.rewriteTableInfo(value, dbId) require.Nil(t, err) @@ -336,13 +335,13 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { require.Equal( t, tableInfo.Partition.Definitions[0].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID], ) require.Equal(t, tableInfo.Partition.Definitions[0].ID, newID1) require.Equal( t, tableInfo.Partition.Definitions[1].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID], ) require.Equal(t, tableInfo.Partition.Definitions[1].ID, newID2) } @@ -402,7 +401,8 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { dbMap[dbID2] = NewDBReplace(db2.Name.O, dbID2+100) dbMap[dbID2].TableMap[tableID2] = NewTableReplace(t2.Name.O, tableID2+100) - tc := NewTableMappingManager(dbMap, mockGenGenGlobalID) + tc := NewTableMappingManager() + tc.MergeBaseDBReplace(dbMap) //exchange partition, t1 partition0 with the t2 t1Copy := t1.Clone() @@ -412,14 +412,13 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { value, err := json.Marshal(&t1Copy) require.Nil(t, err) - err = tc.parseTableValueAndUpdateIdMapping(dbID1, value) + err = tc.ProcessTableValueAndUpdateIdMapping(dbID1, *t1Copy) require.Nil(t, err) sr := NewSchemasReplace( - tc.DbReplaceMap, + tc.DBReplaceMap, nil, 0, - filter.All(), nil, ) @@ -435,7 +434,7 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { // rewrite no partition table value, err = json.Marshal(&t2Copy) require.Nil(t, err) - err = tc.parseTableValueAndUpdateIdMapping(dbID2, value) + err = tc.ProcessTableValueAndUpdateIdMapping(dbID2, *t2Copy) require.Nil(t, err) value, err = sr.rewriteTableInfo(value, dbID2) require.Nil(t, err) @@ -489,7 +488,7 @@ func TestRewriteTableInfoForTTLTable(t *testing.T) { err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) require.Equal(t, tableInfo.Name.String(), tableName) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.NotNil(t, tableInfo.TTLInfo) require.Equal(t, colName, tableInfo.TTLInfo.ColumnName.O) require.Equal(t, "1", tableInfo.TTLInfo.IntervalExprStr) @@ -706,7 +705,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { var qargs *PreDelRangeQuery // drop schema - err := schemaReplace.restoreFromHistory(dropSchemaJob) + err := schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewTableIDSet)) @@ -716,7 +715,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop table0 - err = schemaReplace.restoreFromHistory(dropTable0Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewPartitionIDSet)) @@ -729,42 +728,42 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobTable0NewID)) // drop table1 - err = schemaReplace.restoreFromHistory(dropTable1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobTable1NewID)) // drop table partition1 - err = schemaReplace.restoreFromHistory(dropTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobPartition1NewID)) // reorganize table partition1 - err = schemaReplace.restoreFromHistory(reorganizeTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(reorganizeTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // remove table partition1 - err = schemaReplace.restoreFromHistory(removeTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(removeTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // alter table partition1 - err = schemaReplace.restoreFromHistory(alterTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(alterTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // roll back add index for table0 - err = schemaReplace.restoreFromHistory(rollBackTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(rollBackTable0IndexJob) require.NoError(t, err) oldPartitionIDMap := make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -785,7 +784,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // roll back add index for table1 - err = schemaReplace.restoreFromHistory(rollBackTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(rollBackTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 2) @@ -795,7 +794,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(tablecodec.TempIndexPrefix|2)), qargs.ParamsList[1].StartKey) // drop index for table0 - err = schemaReplace.restoreFromHistory(dropTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0IndexJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -809,14 +808,14 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop index for table1 - err = schemaReplace.restoreFromHistory(dropTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(2)), qargs.ParamsList[0].StartKey) // add index for table 0 - err = schemaReplace.restoreFromHistory(addTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(addTable0IndexJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -830,14 +829,14 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // add index for table 1 - err = schemaReplace.restoreFromHistory(addTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(addTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, tempIndex2), qargs.ParamsList[0].StartKey) // drop column for table0 - err = schemaReplace.restoreFromHistory(dropTable0ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0ColumnJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -858,7 +857,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop column for table1 - err = schemaReplace.restoreFromHistory(dropTable1ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1ColumnJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLIndexesIDSet)) @@ -868,7 +867,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(3)), qargs.ParamsList[1].StartKey) // modify column for table0 - err = schemaReplace.restoreFromHistory(modifyTable0ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(modifyTable0ColumnJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -889,7 +888,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // modify column for table1 - err = schemaReplace.restoreFromHistory(modifyTable1ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(modifyTable1ColumnJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLIndexesIDSet)) @@ -899,7 +898,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(3)), qargs.ParamsList[1].StartKey) // drop indexes(multi-schema-change) for table0 - err = schemaReplace.restoreFromHistory(multiSchemaChangeJob0) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(multiSchemaChangeJob0) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for l := 0; l < 2; l++ { @@ -915,7 +914,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop indexes(multi-schema-change) for table1 - err = schemaReplace.restoreFromHistory(multiSchemaChangeJob1) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(multiSchemaChangeJob1) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) @@ -953,7 +952,7 @@ func TestDeleteRangeForMDDLJob2(t *testing.T) { }) var qargs *PreDelRangeQuery // drop schema - err := schemaReplace.restoreFromHistory(dropSchemaJob) + err := schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewTableIDSet)) @@ -971,7 +970,7 @@ func TestDeleteRangeForMDDLJob2(t *testing.T) { schemaReplace = MockEmptySchemasReplace(midr, map[int64]*DBReplace{ mDDLJobDBOldID: dbReplace, }) - err = schemaReplace.restoreFromHistory(dropSchemaJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewPartitionIDSet)+1) diff --git a/br/pkg/stream/search.go b/br/pkg/stream/search.go index 7cf940a42f135..ea3664739576c 100644 --- a/br/pkg/stream/search.go +++ b/br/pkg/stream/search.go @@ -16,6 +16,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" @@ -193,9 +194,9 @@ func (s *StreamBackupSearch) Search(ctx context.Context) ([]*StreamKVInfo, error writeCFEntries := make(map[string]*StreamKVInfo, 64) for entry := range entriesCh { - if entry.CFName == WriteCF { + if entry.CFName == consts.WriteCF { writeCFEntries[entry.EncodedKey] = entry - } else if entry.CFName == DefaultCF { + } else if entry.CFName == consts.DefaultCF { defaultCFEntries[entry.EncodedKey] = entry } } @@ -241,7 +242,7 @@ func (s *StreamBackupSearch) searchFromDataFile( return errors.Annotatef(err, "decode raw key error, file: %s", dataFile.Path) } - if dataFile.Cf == WriteCF { + if dataFile.Cf == consts.WriteCF { rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(v); err != nil { return errors.Annotatef(err, "parse raw write cf value error, file: %s", dataFile.Path) @@ -262,7 +263,7 @@ func (s *StreamBackupSearch) searchFromDataFile( ShortValue: valueStr, } ch <- kvInfo - } else if dataFile.Cf == DefaultCF { + } else if dataFile.Cf == consts.DefaultCF { kvInfo := &StreamKVInfo{ CFName: dataFile.Cf, StartTs: ts, diff --git a/br/pkg/stream/search_test.go b/br/pkg/stream/search_test.go index 224beb5ac7403..2cf3b74b8efe0 100644 --- a/br/pkg/stream/search_test.go +++ b/br/pkg/stream/search_test.go @@ -13,6 +13,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) @@ -121,7 +122,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w defaultCFCheckSum := sha256.Sum256(defaultCFBuf.Bytes()) defaultCFDataFile = &backuppb.DataFileInfo{ Path: defaultCFFile, - Cf: DefaultCF, + Cf: consts.DefaultCF, Sha256: defaultCFCheckSum[:], } @@ -135,7 +136,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w writeCFCheckSum := sha256.Sum256(writeCFBuf.Bytes()) writeCFDataFile = &backuppb.DataFileInfo{ Path: writeCFFile, - Cf: WriteCF, + Cf: consts.WriteCF, Sha256: writeCFCheckSum[:], } @@ -178,7 +179,7 @@ func TestMergeCFEntries(t *testing.T) { Key: hex.EncodeToString([]byte(defaultCF.key)), EncodedKey: encodedKey, StartTs: uint64(defaultCF.startTs), - CFName: DefaultCF, + CFName: consts.DefaultCF, Value: defaultCF.val, } } @@ -189,7 +190,7 @@ func TestMergeCFEntries(t *testing.T) { EncodedKey: encodedKey, StartTs: uint64(writeCF.startTs), CommitTs: uint64(writeCF.commitTS), - CFName: WriteCF, + CFName: consts.WriteCF, Value: writeCF.val, } } diff --git a/br/pkg/stream/stream_metas.go b/br/pkg/stream/stream_metas.go index 6801035ce7214..064ec6579a4b7 100644 --- a/br/pkg/stream/stream_metas.go +++ b/br/pkg/stream/stream_metas.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/mathutil" @@ -295,7 +296,7 @@ func UpdateShiftTS(m *pb.Metadata, startTS uint64, restoreTS uint64) (uint64, bo for _, ds := range m.FileGroups { for _, d := range ds.DataFilesInfo { - if d.Cf == DefaultCF || d.MinBeginTsInDefaultCf == 0 { + if d.Cf == consts.DefaultCF || d.MinBeginTsInDefaultCf == 0 { continue } if d.MinTs > restoreTS || d.MaxTs < startTS { diff --git a/br/pkg/stream/stream_metas_test.go b/br/pkg/stream/stream_metas_test.go index c0fcbbae623ce..b7a2acdf35e89 100644 --- a/br/pkg/stream/stream_metas_test.go +++ b/br/pkg/stream/stream_metas_test.go @@ -22,6 +22,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + . "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util/intest" "github.com/stretchr/testify/require" "go.uber.org/multierr" diff --git a/br/pkg/stream/stream_mgr.go b/br/pkg/stream/stream_mgr.go index a9dffb23f017d..3c0a6f7d2d22b 100644 --- a/br/pkg/stream/stream_mgr.go +++ b/br/pkg/stream/stream_mgr.go @@ -27,12 +27,12 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/encryption" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "go.uber.org/zap" "golang.org/x/sync/errgroup" ) @@ -81,7 +81,7 @@ func buildObserveTableRange(table *model.TableInfo) []kv.KeyRange { // buildObserveTableRanges builds key ranges to observe table kv-events. func buildObserveTableRanges( storage kv.Storage, - tableFilter filter.Filter, + tableFilter *utils.CombinedFilter, backupTS uint64, ) ([]kv.KeyRange, error) { snapshot := storage.GetSnapshot(kv.NewVersion(backupTS)) @@ -131,11 +131,10 @@ func buildObserverAllRange() []kv.KeyRange { // BuildObserveDataRanges builds key ranges to observe data KV. func BuildObserveDataRanges( storage kv.Storage, - filterStr []string, - tableFilter filter.Filter, + tableFilter *utils.CombinedFilter, backupTS uint64, ) ([]kv.KeyRange, error) { - if len(filterStr) == 1 && filterStr[0] == string("*.*") { + if tableFilter.IsAcceptAll() { return buildObserverAllRange(), nil } // TODO: currently it's a dead code, the iterator metakvs can be optimized @@ -356,6 +355,15 @@ func (*MetadataHelper) Marshal(meta *backuppb.Metadata) ([]byte, error) { return meta.Marshal() } +func (m *MetadataHelper) Close() { + if m.decoder != nil { + m.decoder.Close() + } + if m.encryptionManager != nil { + m.encryptionManager.Close() + } +} + // FastUnmarshalMetaData used a 128 worker pool to speed up // read metadata content from external_storage. func FastUnmarshalMetaData( diff --git a/br/pkg/stream/stream_status.go b/br/pkg/stream/stream_status.go index f22487d2f19ef..2909b8ded8c49 100644 --- a/br/pkg/stream/stream_status.go +++ b/br/pkg/stream/stream_status.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" . "github.com/pingcap/tidb/br/pkg/streamhelper" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -130,9 +131,9 @@ func (p *printByTable) AddTask(task TaskStatus) { table := p.console.CreateTable() table.Add("name", task.Info.Name) table.Add("status", task.colorfulStatusString()) - table.Add("start", fmt.Sprint(FormatDate(oracle.GetTimeFromTS(task.Info.StartTs)))) + table.Add("start", fmt.Sprint(utils.FormatDate(oracle.GetTimeFromTS(task.Info.StartTs)))) if task.Info.EndTs > 0 { - table.Add("end", fmt.Sprint(FormatDate(oracle.GetTimeFromTS(task.Info.EndTs)))) + table.Add("end", fmt.Sprint(utils.FormatDate(oracle.GetTimeFromTS(task.Info.EndTs)))) } s := storage.FormatBackendURL(task.Info.GetStorage()) table.Add("storage", s.String()) @@ -146,7 +147,7 @@ func (p *printByTable) AddTask(task TaskStatus) { if gap > 10*time.Minute { gapColor = color.New(color.FgRed) } - info := fmt.Sprintf("%s; gap=%s", FormatDate(pTime), gapColor.Sprint(gap)) + info := fmt.Sprintf("%s; gap=%s", utils.FormatDate(pTime), gapColor.Sprint(gap)) return info } table.Add("checkpoint[global]", formatTS(task.globalCheckpoint)) diff --git a/br/pkg/stream/table_history.go b/br/pkg/stream/table_history.go new file mode 100644 index 0000000000000..99a51a8c02f95 --- /dev/null +++ b/br/pkg/stream/table_history.go @@ -0,0 +1,71 @@ +// Copyright 2022-present PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stream + +// DBIDAndTableName stores db id and the table name to locate the table +type DBIDAndTableName struct { + DbID int64 + TableName string +} + +type LogBackupTableHistoryManager struct { + // maps table ID to its original and current names + // [0] is original location, [1] is current location + tableNameHistory map[int64][2]DBIDAndTableName + // record all the db id to name that were seen during log backup DDL history + dbIdToName map[int64]string + needToBuildIdMap bool +} + +func NewTableHistoryManager() *LogBackupTableHistoryManager { + return &LogBackupTableHistoryManager{ + tableNameHistory: make(map[int64][2]DBIDAndTableName), + dbIdToName: make(map[int64]string), + } +} + +func (info *LogBackupTableHistoryManager) AddTableHistory(tableId int64, tableName string, dbID int64) { + tableLocationInfo := DBIDAndTableName{ + DbID: dbID, + TableName: tableName, + } + names, exists := info.tableNameHistory[tableId] + if !exists { + // first occurrence - store as original name + info.tableNameHistory[tableId] = [2]DBIDAndTableName{tableLocationInfo, tableLocationInfo} + } else { + // update current name while preserving original name + info.tableNameHistory[tableId] = [2]DBIDAndTableName{names[0], tableLocationInfo} + } +} + +func (info *LogBackupTableHistoryManager) RecordDBIdToName(dbId int64, dbName string) { + info.dbIdToName[dbId] = dbName +} + +// GetTableHistory returns information about all tables that have been renamed. +// Returns a map of table IDs to their original and current locations +func (info *LogBackupTableHistoryManager) GetTableHistory() map[int64][2]DBIDAndTableName { + return info.tableNameHistory +} + +func (info *LogBackupTableHistoryManager) GetDBNameByID(dbId int64) (string, bool) { + name, ok := info.dbIdToName[dbId] + return name, ok +} + +func (info *LogBackupTableHistoryManager) GetNewlyCreatedDBHistory() map[int64]string { + return info.dbIdToName +} diff --git a/br/pkg/stream/table_mapping.go b/br/pkg/stream/table_mapping.go index ff44f1fdc7b35..bc71865575aa4 100644 --- a/br/pkg/stream/table_mapping.go +++ b/br/pkg/stream/table_mapping.go @@ -16,32 +16,57 @@ package stream import ( "context" - "encoding/json" "fmt" + "sort" "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" - "github.com/pingcap/log" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/kv" - "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" ) -// TableMappingManager iterates on log backup meta kvs and generate new id for DB, table and partition for +const InitialTempId int64 = 0 + +// TableMappingManager processes each log backup meta kv and generate new id for DB, table and partition for // downstream cluster. It maintains the id mapping and passes down later to the rewrite logic. +// +// The usage in the code base is listed below +// 1. during PiTR, it runs before snapshot restore to collect table id mapping information. For each id to map it +// generates a dummy downstream id first, this is because we can only generate global id after running snapshot restore +// 2. at log restore phase, it merges the db replace map generated from the full backup or previous PiTR task, it will +// replace some dummy id at this step. +// 3. it runs a filter to filter out tables that we don't need +// 4. after all above steps, it uses the genGenGlobalIDs method to generate a batch of ids in one call and replace +// the dummy ids, it builds the final state of the db replace map type TableMappingManager struct { - DbReplaceMap map[UpstreamID]*DBReplace - globalIdMap map[UpstreamID]DownstreamID - genGlobalIdFn func(ctx context.Context) (int64, error) + DBReplaceMap map[UpstreamID]*DBReplace + globalIdMap map[UpstreamID]DownstreamID + + // a counter for temporary IDs, need to get real global id + // once full restore completes + tempIDCounter DownstreamID +} + +func NewTableMappingManager() *TableMappingManager { + return &TableMappingManager{ + DBReplaceMap: make(map[UpstreamID]*DBReplace), + globalIdMap: make(map[UpstreamID]DownstreamID), + tempIDCounter: InitialTempId, + } } -func NewTableMappingManager( - dbReplaceMap map[UpstreamID]*DBReplace, - genGlobalIdFn func(ctx context.Context) (int64, error)) *TableMappingManager { +func (tm *TableMappingManager) FromDBReplaceMap(dbReplaceMap map[UpstreamID]*DBReplace) error { + if !tm.IsEmpty() { + return errors.Annotate(berrors.ErrRestoreInvalidRewrite, + "expect table mapping manager empty when need to load ID map") + } + if dbReplaceMap == nil { dbReplaceMap = make(map[UpstreamID]*DBReplace) } - globalTableIdMap := make(map[UpstreamID]DownstreamID) for _, dr := range dbReplaceMap { for tblID, tr := range dr.TableMap { @@ -51,104 +76,45 @@ func NewTableMappingManager( } } } + tm.globalIdMap = globalTableIdMap + tm.DBReplaceMap = dbReplaceMap - return &TableMappingManager{ - DbReplaceMap: dbReplaceMap, - globalIdMap: globalTableIdMap, - genGlobalIdFn: genGlobalIdFn, - } -} - -// ParseMetaKvAndUpdateIdMapping collect table information -func (tc *TableMappingManager) ParseMetaKvAndUpdateIdMapping(e *kv.Entry, cf string) error { - if !IsMetaDBKey(e.Key) { - return nil - } - - rawKey, err := ParseTxnMetaKeyFrom(e.Key) - if err != nil { - return errors.Trace(err) - } - - value, err := extractValue(e, cf) - if err != nil { - return errors.Trace(err) - } - // sanity check - if value == nil { - log.Warn("entry suggests having short value but is nil") - return nil - } - - if meta.IsDBkey(rawKey.Field) { - return tc.parseDBValueAndUpdateIdMapping(value) - } else if !meta.IsDBkey(rawKey.Key) { - return nil - } - - if meta.IsTableKey(rawKey.Field) { - dbID, err := ParseDBIDFromTableKey(e.Key) - if err != nil { - return errors.Trace(err) - } - return tc.parseTableValueAndUpdateIdMapping(dbID, value) - } return nil } -func (tc *TableMappingManager) parseDBValueAndUpdateIdMapping(value []byte) error { - dbInfo := new(model.DBInfo) - if err := json.Unmarshal(value, dbInfo); err != nil { - return errors.Trace(err) - } - - if dr, exist := tc.DbReplaceMap[dbInfo.ID]; !exist { - newID, err := tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.DbReplaceMap[dbInfo.ID] = NewDBReplace(dbInfo.Name.O, newID) - tc.globalIdMap[dbInfo.ID] = newID +func (tm *TableMappingManager) ProcessDBValueAndUpdateIdMapping(dbInfo model.DBInfo) error { + if dr, exist := tm.DBReplaceMap[dbInfo.ID]; !exist { + newID := tm.generateTempID() + tm.DBReplaceMap[dbInfo.ID] = NewDBReplace(dbInfo.Name.O, newID) + tm.globalIdMap[dbInfo.ID] = newID } else { dr.Name = dbInfo.Name.O } return nil } -func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, value []byte) error { +func (tm *TableMappingManager) ProcessTableValueAndUpdateIdMapping(dbID int64, tableInfo model.TableInfo) error { var ( - tableInfo model.TableInfo - err error exist bool dbReplace *DBReplace tableReplace *TableReplace ) - if err := json.Unmarshal(value, &tableInfo); err != nil { - return errors.Trace(err) - } - // construct or find the id map. - dbReplace, exist = tc.DbReplaceMap[dbID] + dbReplace, exist = tm.DBReplaceMap[dbID] if !exist { - newID, err := tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.globalIdMap[dbID] = newID + newID := tm.generateTempID() + tm.globalIdMap[dbID] = newID dbReplace = NewDBReplace("", newID) - tc.DbReplaceMap[dbID] = dbReplace + tm.DBReplaceMap[dbID] = dbReplace } tableReplace, exist = dbReplace.TableMap[tableInfo.ID] if !exist { - newID, exist := tc.globalIdMap[tableInfo.ID] + newID, exist := tm.globalIdMap[tableInfo.ID] if !exist { - newID, err = tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.globalIdMap[tableInfo.ID] = newID + newID = tm.generateTempID() + tm.globalIdMap[tableInfo.ID] = newID } tableReplace = NewTableReplace(tableInfo.Name.O, newID) @@ -164,13 +130,10 @@ func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, val for i, partition := range partitions.Definitions { newID, exist := tableReplace.PartitionMap[partition.ID] if !exist { - newID, exist = tc.globalIdMap[partition.ID] + newID, exist = tm.globalIdMap[partition.ID] if !exist { - newID, err = tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.globalIdMap[partition.ID] = newID + newID = tm.generateTempID() + tm.globalIdMap[partition.ID] = newID } tableReplace.PartitionMap[partition.ID] = newID } @@ -180,11 +143,179 @@ func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, val return nil } +func (tm *TableMappingManager) MergeBaseDBReplace(baseMap map[UpstreamID]*DBReplace) { + // update globalIdMap + for upstreamID, dbReplace := range baseMap { + tm.globalIdMap[upstreamID] = dbReplace.DbID + + for tableUpID, tableReplace := range dbReplace.TableMap { + tm.globalIdMap[tableUpID] = tableReplace.TableID + for partUpID, partDownID := range tableReplace.PartitionMap { + tm.globalIdMap[partUpID] = partDownID + } + } + } + + // merge baseMap to DBReplaceMap + for upstreamID, baseDBReplace := range baseMap { + if existingDBReplace, exists := tm.DBReplaceMap[upstreamID]; exists { + existingDBReplace.DbID = baseDBReplace.DbID + + for tableUpID, baseTableReplace := range baseDBReplace.TableMap { + if existingTableReplace, tableExists := existingDBReplace.TableMap[tableUpID]; tableExists { + existingTableReplace.TableID = baseTableReplace.TableID + + for partUpID, basePartDownID := range baseTableReplace.PartitionMap { + existingTableReplace.PartitionMap[partUpID] = basePartDownID + } + } else { + existingDBReplace.TableMap[tableUpID] = baseTableReplace + } + } + } else { + tm.DBReplaceMap[upstreamID] = baseDBReplace + } + } +} + +func (tm *TableMappingManager) IsEmpty() bool { + return len(tm.DBReplaceMap) == 0 && len(tm.globalIdMap) == 0 +} + +func (tm *TableMappingManager) ReplaceTemporaryIDs( + ctx context.Context, genGenGlobalIDs func(ctx context.Context, n int) ([]int64, error)) error { + if tm.tempIDCounter == InitialTempId { + // no temporary IDs were allocated + return nil + } + + // find actually used temporary IDs + usedTempIDs := make(map[DownstreamID]struct{}) + + // check DBReplaceMap for used temporary IDs + // any value less than 0 is temporary ID + for _, dr := range tm.DBReplaceMap { + if dr.DbID < 0 { + usedTempIDs[dr.DbID] = struct{}{} + } + for _, tr := range dr.TableMap { + if tr.TableID < 0 { + usedTempIDs[tr.TableID] = struct{}{} + } + for _, partID := range tr.PartitionMap { + if partID < 0 { + usedTempIDs[partID] = struct{}{} + } + } + } + } + + // check in globalIdMap as well just be safe + for _, downID := range tm.globalIdMap { + if downID < 0 { + usedTempIDs[downID] = struct{}{} + } + } + + tempIDs := make([]DownstreamID, 0, len(usedTempIDs)) + // convert to sorted slice + for id := range usedTempIDs { + tempIDs = append(tempIDs, id) + } + + // sort to -1, -2, -4 ... etc + sort.Slice(tempIDs, func(i, j int) bool { + return tempIDs[i] > tempIDs[j] + }) + + // early return if no temp id used + if len(tempIDs) == 0 { + tm.tempIDCounter = InitialTempId + return nil + } + + // generate real global IDs only for actually used temporary IDs + newIDs, err := genGenGlobalIDs(ctx, len(tempIDs)) + if err != nil { + return errors.Trace(err) + } + + // create mapping from temp IDs to new IDs + idMapping := make(map[DownstreamID]DownstreamID, len(tempIDs)) + for i, tempID := range tempIDs { + idMapping[tempID] = newIDs[i] + } + + // replace temp id in globalIdMap + for upID, downID := range tm.globalIdMap { + if newID, exists := idMapping[downID]; exists { + tm.globalIdMap[upID] = newID + } + } + + // replace temp id in DBReplaceMap + for _, dr := range tm.DBReplaceMap { + if newID, exists := idMapping[dr.DbID]; exists { + dr.DbID = newID + } + + for _, tr := range dr.TableMap { + if newID, exists := idMapping[tr.TableID]; exists { + tr.TableID = newID + } + + for oldPID, tempPID := range tr.PartitionMap { + if newID, exists := idMapping[tempPID]; exists { + tr.PartitionMap[oldPID] = newID + } + } + } + } + + tm.tempIDCounter = InitialTempId + return nil +} + +func (tm *TableMappingManager) FilterDBReplaceMap(filter *utils.PiTRTableFilter) { + // collect all IDs that should be kept + keepIDs := make(map[UpstreamID]struct{}) + + // iterate through existing DBReplaceMap + for dbID, dbReplace := range tm.DBReplaceMap { + // remove entire database if not in filter + if !filter.ContainsDB(dbID) { + delete(tm.DBReplaceMap, dbID) + continue + } + + keepIDs[dbID] = struct{}{} + + // filter tables in this database + for tableID, tableReplace := range dbReplace.TableMap { + if !filter.ContainsTable(dbID, tableID) { + delete(dbReplace.TableMap, tableID) + } else { + keepIDs[tableID] = struct{}{} + for partitionID := range tableReplace.PartitionMap { + keepIDs[partitionID] = struct{}{} + } + } + } + } + + // remove any ID from globalIdMap that isn't in keepIDs + for id := range tm.globalIdMap { + if _, ok := keepIDs[id]; !ok { + delete(tm.globalIdMap, id) + } + } +} + // ToProto produces schemas id maps from up-stream to down-stream. -func (tc *TableMappingManager) ToProto() []*backuppb.PitrDBMap { - dbMaps := make([]*backuppb.PitrDBMap, 0, len(tc.DbReplaceMap)) +func (tm *TableMappingManager) ToProto() []*backuppb.PitrDBMap { + dbMaps := make([]*backuppb.PitrDBMap, 0, len(tm.DBReplaceMap)) - for dbID, dr := range tc.DbReplaceMap { + for dbID, dr := range tm.DBReplaceMap { dbm := backuppb.PitrDBMap{ Name: dr.Name, IdMap: &backuppb.IDMap{ @@ -215,7 +346,6 @@ func (tc *TableMappingManager) ToProto() []*backuppb.PitrDBMap { } dbMaps = append(dbMaps, &dbm) } - return dbMaps } @@ -234,15 +364,14 @@ func FromDBMapProto(dbMaps []*backuppb.PitrDBMap) map[UpstreamID]*DBReplace { } } } - return dbReplaces } -func extractValue(e *kv.Entry, cf string) ([]byte, error) { +func ExtractValue(e *kv.Entry, cf string) ([]byte, error) { switch cf { - case DefaultCF: + case consts.DefaultCF: return e.Value, nil - case WriteCF: + case consts.WriteCF: rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(e.Value); err != nil { return nil, errors.Trace(err) @@ -255,3 +384,8 @@ func extractValue(e *kv.Entry, cf string) ([]byte, error) { panic(fmt.Sprintf("not support cf:%s", cf)) } } + +func (tm *TableMappingManager) generateTempID() DownstreamID { + tm.tempIDCounter-- + return tm.tempIDCounter +} diff --git a/br/pkg/stream/table_mapping_test.go b/br/pkg/stream/table_mapping_test.go index 3f816c5399665..451ced524a616 100644 --- a/br/pkg/stream/table_mapping_test.go +++ b/br/pkg/stream/table_mapping_test.go @@ -16,18 +16,13 @@ package stream import ( "context" + "errors" "testing" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/stretchr/testify/require" ) -var increaseID int64 = 100 - -func mockGenGenGlobalID(_ctx context.Context) (int64, error) { - increaseID++ - return increaseID, nil -} - func TestToProto(t *testing.T) { var ( dbName, tblName string = "db1", "t1" @@ -49,9 +44,11 @@ func TestToProto(t *testing.T) { drs[oldDBID] = dr // create schemas replace and test ToProto(). - tc := NewTableMappingManager(drs, mockGenGenGlobalID) + tm := NewTableMappingManager() + err := tm.FromDBReplaceMap(drs) + require.NoError(t, err) - dbMap := tc.ToProto() + dbMap := tm.ToProto() require.Equal(t, len(dbMap), 1) require.Equal(t, dbMap[0].Name, dbName) require.Equal(t, dbMap[0].IdMap.UpstreamId, oldDBID) @@ -80,3 +77,881 @@ func TestToProto(t *testing.T) { drs2 := FromDBMapProto(dbMap) require.Equal(t, drs2, drs) } + +func TestMergeBaseDBReplace(t *testing.T) { + tests := []struct { + name string + existing map[UpstreamID]*DBReplace + base map[UpstreamID]*DBReplace + expected map[UpstreamID]*DBReplace + }{ + { + name: "merge into empty existing map", + existing: map[UpstreamID]*DBReplace{}, + base: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + }, + }, + { + name: "merge empty base map", + existing: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: -10, Name: "table1"}, + }, + }, + }, + base: map[UpstreamID]*DBReplace{}, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: -10, Name: "table1"}, + }, + }, + }, + }, + { + name: "merge new database with partitions", + existing: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -10, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: -100, + }, + }, + }, + }, + }, + base: map[UpstreamID]*DBReplace{ + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: { + TableID: 2020, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 200: 2200, + }, + }, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -10, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: -100, + }, + }, + }, + }, + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: { + TableID: 2020, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 200: 2200, + }, + }, + }, + }, + }, + }, + { + name: "merge existing database with multiple tables", + existing: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: -10, Name: "table1"}, + 11: {TableID: -11, Name: "table2"}, + }, + }, + }, + base: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + 11: {TableID: 1011, Name: "table2"}, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + 11: {TableID: 1011, Name: "table2"}, + }, + }, + }, + }, + { + name: "merge with complex partition updates", + existing: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -10, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: -100, + 101: -101, + }, + }, + }, + }, + }, + base: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 1010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1100, + 101: 1101, + 102: 1102, // new partition + }, + }, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 1010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1100, + 101: 1101, + 102: 1102, + }, + }, + }, + }, + }, + }, + { + name: "merge multiple databases with mixed states", + existing: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: -10, Name: "table1"}, + }, + }, + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2020, Name: "table2"}, + }, + }, + }, + base: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + 3: { + Name: "db3", + DbID: 3000, + TableMap: map[UpstreamID]*TableReplace{ + 30: {TableID: 3030, Name: "table3"}, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2020, Name: "table2"}, + }, + }, + 3: { + Name: "db3", + DbID: 3000, + TableMap: map[UpstreamID]*TableReplace{ + 30: {TableID: 3030, Name: "table3"}, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tm := NewTableMappingManager() + tm.DBReplaceMap = tt.existing + tm.MergeBaseDBReplace(tt.base) + require.Equal(t, tt.expected, tm.DBReplaceMap) + + // Additional verification for deep equality of nested structures + for dbID, dbReplace := range tt.expected { + require.Contains(t, tm.DBReplaceMap, dbID) + require.Equal(t, dbReplace.Name, tm.DBReplaceMap[dbID].Name) + require.Equal(t, dbReplace.DbID, tm.DBReplaceMap[dbID].DbID) + + for tblID, tblReplace := range dbReplace.TableMap { + require.Contains(t, tm.DBReplaceMap[dbID].TableMap, tblID) + require.Equal(t, tblReplace.Name, tm.DBReplaceMap[dbID].TableMap[tblID].Name) + require.Equal(t, tblReplace.TableID, tm.DBReplaceMap[dbID].TableMap[tblID].TableID) + require.Equal(t, tblReplace.PartitionMap, tm.DBReplaceMap[dbID].TableMap[tblID].PartitionMap) + } + } + }) + } +} + +func TestFilterDBReplaceMap(t *testing.T) { + tests := []struct { + name string + initial map[UpstreamID]*DBReplace + filter *utils.PiTRTableFilter + expected map[UpstreamID]*DBReplace + }{ + { + name: "empty filter keeps nothing", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + }, + filter: &utils.PiTRTableFilter{ + DbIdToTable: map[int64]map[int64]struct{}{}, + }, + expected: map[UpstreamID]*DBReplace{}, + }, + { + name: "filter specific database", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2020, Name: "table2"}, + }, + }, + }, + filter: &utils.PiTRTableFilter{ + DbIdToTable: map[int64]map[int64]struct{}{ + 1: {10: struct{}{}}, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + }, + }, + { + name: "filter specific tables within database", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + 11: {TableID: 1011, Name: "table2"}, + 12: {TableID: 1012, Name: "table3"}, + }, + }, + }, + filter: &utils.PiTRTableFilter{ + DbIdToTable: map[int64]map[int64]struct{}{ + 1: { + 10: struct{}{}, + 12: struct{}{}, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + 12: {TableID: 1012, Name: "table3"}, + }, + }, + }, + }, + { + name: "filter tables with partitions", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 1010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1100, + 101: 1101, + }, + }, + 11: { + TableID: 1011, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 102: 1102, + 103: 1103, + }, + }, + }, + }, + }, + filter: &utils.PiTRTableFilter{ + DbIdToTable: map[int64]map[int64]struct{}{ + 1: {10: struct{}{}}, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 1010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1100, + 101: 1101, + }, + }, + }, + }, + }, + }, + { + name: "filter with multiple databases and tables", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + 11: {TableID: 1011, Name: "table2"}, + }, + }, + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2020, Name: "table3"}, + 21: {TableID: 2021, Name: "table4"}, + }, + }, + 3: { + Name: "db3", + DbID: 3000, + TableMap: map[UpstreamID]*TableReplace{ + 30: {TableID: 3030, Name: "table5"}, + }, + }, + }, + filter: &utils.PiTRTableFilter{ + DbIdToTable: map[int64]map[int64]struct{}{ + 1: {10: struct{}{}}, + 2: { + 20: struct{}{}, + 21: struct{}{}, + }, + }, + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + 2: { + Name: "db2", + DbID: 2000, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2020, Name: "table3"}, + 21: {TableID: 2021, Name: "table4"}, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tm := NewTableMappingManager() + tm.DBReplaceMap = tt.initial + + // create a copy of globalIdMap before filtering + globalIdMap := make(map[UpstreamID]DownstreamID) + for dbID, dbReplace := range tt.initial { + globalIdMap[dbID] = dbReplace.DbID + for tblID, tblReplace := range dbReplace.TableMap { + globalIdMap[tblID] = tblReplace.TableID + for partID, partDownID := range tblReplace.PartitionMap { + globalIdMap[partID] = partDownID + } + } + } + tm.globalIdMap = globalIdMap + + tm.FilterDBReplaceMap(tt.filter) + + // verify DBReplaceMap is as expected + require.Equal(t, tt.expected, tm.DBReplaceMap) + + // verify globalIdMap is properly filtered as well + for dbID, dbReplace := range tt.expected { + require.Equal(t, dbReplace.DbID, tm.globalIdMap[dbID]) + for tblID, tblReplace := range dbReplace.TableMap { + require.Equal(t, tblReplace.TableID, tm.globalIdMap[tblID]) + for partID, partDownID := range tblReplace.PartitionMap { + require.Equal(t, partDownID, tm.globalIdMap[partID]) + } + } + } + + // verify that filtered IDs are removed from globalIdMap + for upID := range globalIdMap { + found := false + for dbID, dbReplace := range tt.expected { + if upID == dbID { + found = true + break + } + for tblID, tblReplace := range dbReplace.TableMap { + if upID == tblID { + found = true + break + } + for partID := range tblReplace.PartitionMap { + if upID == partID { + found = true + break + } + } + } + } + if !found { + _, exists := tm.globalIdMap[upID] + require.False(t, exists, "ID %d should have been removed from globalIdMap", upID) + } + } + }) + } +} + +func TestReplaceTemporaryIDs(t *testing.T) { + tests := []struct { + name string + initial map[UpstreamID]*DBReplace + tempCounter DownstreamID + genGlobalIDs func(context.Context, int) ([]int64, error) + expected map[UpstreamID]*DBReplace + expectedErr error + }{ + { + name: "no temporary IDs", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + }, + tempCounter: InitialTempId, + genGlobalIDs: func(ctx context.Context, n int) ([]int64, error) { + return nil, nil + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: 1010, Name: "table1"}, + }, + }, + }, + expectedErr: nil, + }, + { + name: "replace all temporary IDs", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -2, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: -3, + }, + }, + }, + }, + }, + tempCounter: -3, + genGlobalIDs: func(ctx context.Context, n int) ([]int64, error) { + return []int64{1000, 1010, 1020}, nil + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 1010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1020, + }, + }, + }, + }, + }, + expectedErr: nil, + }, + { + name: "mixed temporary and global IDs", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -1, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1100, + 101: -2, + }, + }, + }, + }, + 2: { + Name: "db2", + DbID: -3, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2000, Name: "table2"}, + }, + }, + }, + tempCounter: -3, + genGlobalIDs: func(ctx context.Context, n int) ([]int64, error) { + return []int64{2010, 2020, 2030}, nil + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 2010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1100, + 101: 2020, + }, + }, + }, + }, + 2: { + Name: "db2", + DbID: 2030, + TableMap: map[UpstreamID]*TableReplace{ + 20: {TableID: 2000, Name: "table2"}, + }, + }, + }, + expectedErr: nil, + }, + { + name: "error generating global IDs", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: -2, Name: "table1"}, + }, + }, + }, + tempCounter: -2, + genGlobalIDs: func(ctx context.Context, n int) ([]int64, error) { + return nil, errors.New("failed to generate global IDs") + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: {TableID: -2, Name: "table1"}, + }, + }, + }, + expectedErr: errors.New("failed to generate global IDs"), + }, + { + name: "complex structure with multiple temporary IDs", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -1, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -2, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: -3, + 101: -4, + }, + }, + 11: { + TableID: -5, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 102: -6, + }, + }, + }, + }, + 2: { + Name: "db2", + DbID: -7, + TableMap: map[UpstreamID]*TableReplace{ + 20: { + TableID: -8, + Name: "table3", + }, + }, + }, + }, + tempCounter: -8, + genGlobalIDs: func(ctx context.Context, n int) ([]int64, error) { + ids := make([]int64, n) + for i := 0; i < n; i++ { + ids[i] = int64(1000 + i*10) + } + return ids, nil + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 1000, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 1010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 1020, + 101: 1030, + }, + }, + 11: { + TableID: 1040, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 102: 1050, + }, + }, + }, + }, + 2: { + Name: "db2", + DbID: 1060, + TableMap: map[UpstreamID]*TableReplace{ + 20: { + TableID: 1070, + Name: "table3", + }, + }, + }, + }, + expectedErr: nil, + }, + { + name: "non-consecutive temporary IDs", + initial: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: -5, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: -2, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: -8, + 101: -1, + }, + }, + 11: { + TableID: -15, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 102: -3, + }, + }, + }, + }, + }, + tempCounter: -15, + genGlobalIDs: func(ctx context.Context, n int) ([]int64, error) { + ids := make([]int64, n) + for i := 0; i < n; i++ { + ids[i] = int64(2000 + i*10) + } + return ids, nil + }, + expected: map[UpstreamID]*DBReplace{ + 1: { + Name: "db1", + DbID: 2030, + TableMap: map[UpstreamID]*TableReplace{ + 10: { + TableID: 2010, + Name: "table1", + PartitionMap: map[UpstreamID]DownstreamID{ + 100: 2040, + 101: 2000, + }, + }, + 11: { + TableID: 2050, + Name: "table2", + PartitionMap: map[UpstreamID]DownstreamID{ + 102: 2020, + }, + }, + }, + }, + }, + expectedErr: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tm := NewTableMappingManager() + tm.DBReplaceMap = tt.initial + tm.tempIDCounter = tt.tempCounter + + globalIdMap := make(map[UpstreamID]DownstreamID) + for dbID, dbReplace := range tt.initial { + globalIdMap[dbID] = dbReplace.DbID + for tblID, tblReplace := range dbReplace.TableMap { + globalIdMap[tblID] = tblReplace.TableID + for partID, partDownID := range tblReplace.PartitionMap { + globalIdMap[partID] = partDownID + } + } + } + tm.globalIdMap = globalIdMap + + err := tm.ReplaceTemporaryIDs(context.Background(), tt.genGlobalIDs) + + if tt.expectedErr != nil { + require.Error(t, err) + require.Contains(t, err.Error(), tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + require.Equal(t, tt.expected, tm.DBReplaceMap) + require.Equal(t, InitialTempId, tm.tempIDCounter) + + // verify globalIdMap is properly updated as well + for dbID, dbReplace := range tt.expected { + require.Equal(t, dbReplace.DbID, tm.globalIdMap[dbID]) + for tblID, tblReplace := range dbReplace.TableMap { + require.Equal(t, tblReplace.TableID, tm.globalIdMap[tblID]) + for partID, partDownID := range tblReplace.PartitionMap { + require.Equal(t, partDownID, tm.globalIdMap[partID]) + } + } + } + + // verify no temporary IDs remain + for _, id := range tm.globalIdMap { + require.False(t, id < 0, "temporary ID %d still exists in globalIdMap", id) + } + }) + } +} diff --git a/br/pkg/stream/util.go b/br/pkg/stream/util.go deleted file mode 100644 index 10215a68df61d..0000000000000 --- a/br/pkg/stream/util.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. - -package stream - -import ( - "strings" - "time" -) - -const DATE_FORMAT = "2006-01-02 15:04:05.999999999 -0700" - -func FormatDate(ts time.Time) string { - return ts.Format(DATE_FORMAT) -} - -func IsMetaDBKey(key []byte) bool { - return strings.HasPrefix(string(key), "mDB") -} - -func IsMetaDDLJobHistoryKey(key []byte) bool { - return strings.HasPrefix(string(key), "mDDLJobH") -} - -func MaybeDBOrDDLJobHistoryKey(key []byte) bool { - return strings.HasPrefix(string(key), "mD") -} diff --git a/br/pkg/stream/util_test.go b/br/pkg/stream/util_test.go deleted file mode 100644 index 6dda62a04ad60..0000000000000 --- a/br/pkg/stream/util_test.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. - -package stream - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" - "github.com/tikv/client-go/v2/oracle" -) - -func TestDateFormat(t *testing.T) { - cases := []struct { - ts uint64 - target string - }{ - { - 434604259287760897, - "2022-07-15 19:14:39.534 +0800", - }, - { - 434605479096221697, - "2022-07-15 20:32:12.734 +0800", - }, - { - 434605478903808000, - "2022-07-15 20:32:12 +0800", - }, - } - - timeZone, _ := time.LoadLocation("Asia/Shanghai") - for _, ca := range cases { - date := FormatDate(oracle.GetTimeFromTS(ca.ts).In(timeZone)) - require.Equal(t, ca.target, date) - } -} - -func TestPrefix(t *testing.T) { - require.True(t, IsMetaDBKey([]byte("mDBs"))) - require.False(t, IsMetaDBKey([]byte("mDDL"))) - require.True(t, IsMetaDDLJobHistoryKey([]byte("mDDLJobHistory"))) - require.False(t, IsMetaDDLJobHistoryKey([]byte("mDDL"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDL"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDB:"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDDLHistory"))) - require.False(t, MaybeDBOrDDLJobHistoryKey([]byte("DDL"))) -} diff --git a/br/pkg/task/BUILD.bazel b/br/pkg/task/BUILD.bazel index 12c476f34a544..63a2c62897c68 100644 --- a/br/pkg/task/BUILD.bazel +++ b/br/pkg/task/BUILD.bazel @@ -141,7 +141,6 @@ go_test( "//pkg/tablecodec", "//pkg/testkit", "//pkg/types", - "//pkg/util/table-filter", "@com_github_docker_go_units//:go-units", "@com_github_gogo_protobuf//proto", "@com_github_golang_protobuf//proto", diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index af92518043a90..b7f05f14552fe 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -585,8 +585,8 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig }) } - // nothing to backup - if len(ranges) == 0 { + // check on ranges and schemas and if nothing to back up do early return + if len(ranges) == 0 && (schemas == nil || schemas.Len() == 0) { pdAddress := strings.Join(cfg.PD, ",") log.Warn("Nothing to backup, maybe connected to cluster for restoring", zap.String("PD address", pdAddress)) diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index 1813741634609..1246921d34484 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -66,6 +66,7 @@ const ( flagConcurrency = "concurrency" FlagChecksum = "checksum" flagFilter = "filter" + flagSysFilter = "sys-filter" flagCaseSensitive = "case-sensitive" flagRemoveTiFlash = "remove-tiflash" flagCheckRequirement = "check-requirements" @@ -252,8 +253,13 @@ type Config struct { // should be removed after TiDB upgrades the BR dependency. Filter filter.MySQLReplicationRules - FilterStr []string `json:"filter-strings" toml:"filter-strings"` - TableFilter filter.Filter `json:"-" toml:"-"` + FilterStr []string `json:"filter-strings" toml:"filter-strings"` + // generated from FilterStr provides by user + TableFilter *utils.CombinedFilter `json:"-" toml:"-"` + // PiTRTableFilter generated from TableFilter during snapshot restore, it has all the db id and table id that needs + // to be restored + PiTRTableFilter *utils.PiTRTableFilter `json:"-" toml:"-"` + SwitchModeInterval time.Duration `json:"switch-mode-interval" toml:"switch-mode-interval"` // Schemas is a database name set, to check whether the restore database has been backup Schemas map[string]struct{} @@ -401,14 +407,17 @@ func DefineTableFlags(command *cobra.Command) { _ = command.MarkFlagRequired(flagTable) } -// DefineFilterFlags defines the --filter and --case-sensitive flags for `full` subcommand. -func DefineFilterFlags(command *cobra.Command, defaultFilter []string, setHidden bool) { +// DefineFilterFlags defines the --filter and --case-sensitive flags. +func DefineFilterFlags(command *cobra.Command, defaultFilter []string, defaultSysFilter []string, setHidden bool) { flags := command.Flags() flags.StringArrayP(flagFilter, "f", defaultFilter, "select tables to process") - flags.Bool(flagCaseSensitive, false, "whether the table names used in --filter should be case-sensitive") + // doesn't have a good short hand since -s is used already + flags.StringArray(flagSysFilter, defaultSysFilter, "select system tables to process") + flags.Bool(flagCaseSensitive, false, "whether the table names used in --sys-filter and --filter should be case-sensitive") if setHidden { _ = flags.MarkHidden(flagFilter) + _ = flags.MarkHidden(flagSysFilter) _ = flags.MarkHidden(flagCaseSensitive) } } @@ -625,45 +634,17 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error { } cfg.RateLimit = rateLimit * rateLimitUnit - cfg.Schemas = make(map[string]struct{}) - cfg.Tables = make(map[string]struct{}) - var caseSensitive bool - if filterFlag := flags.Lookup(flagFilter); filterFlag != nil { - cfg.ExplicitFilter = flags.Changed(flagFilter) - cfg.FilterStr = filterFlag.Value.(pflag.SliceValue).GetSlice() - cfg.TableFilter, err = filter.Parse(cfg.FilterStr) - if err != nil { - return errors.Trace(err) - } - caseSensitive, err = flags.GetBool(flagCaseSensitive) - if err != nil { - return errors.Trace(err) - } - } else if dbFlag := flags.Lookup(flagDatabase); dbFlag != nil { - db := dbFlag.Value.String() - if len(db) == 0 { - return errors.Annotate(berrors.ErrInvalidArgument, "empty database name is not allowed") - } - cfg.Schemas[utils.EncloseName(db)] = struct{}{} - if tblFlag := flags.Lookup(flagTable); tblFlag != nil { - tbl := tblFlag.Value.String() - if len(tbl) == 0 { - return errors.Annotate(berrors.ErrInvalidArgument, "empty table name is not allowed") - } - cfg.Tables[utils.EncloseDBAndTable(db, tbl)] = struct{}{} - cfg.TableFilter = filter.NewTablesFilter(filter.Table{ - Schema: db, - Name: tbl, - }) - } else { - cfg.TableFilter = filter.NewSchemasFilter(db) - } - } else { - cfg.TableFilter, _ = filter.Parse([]string{"*.*"}) + // build table filter + err = buildFilterFromTableFlag(cfg, flags) + if err != nil { + return errors.Trace(err) } - if !caseSensitive { - cfg.TableFilter = filter.CaseInsensitive(cfg.TableFilter) + + err = buildFilterFromFilterFlag(cfg, flags) + if err != nil { + return errors.Trace(err) } + checkRequirements, err := flags.GetBool(flagCheckRequirement) if err != nil { return errors.Trace(err) @@ -732,6 +713,94 @@ func (cfg *Config) ParseFromFlags(flags *pflag.FlagSet) error { return cfg.normalizePDURLs() } +func buildFilterFromTableFlag(cfg *Config, flags *pflag.FlagSet) error { + var userFilter filter.Filter + var sysFilter filter.Filter + cfg.Schemas = make(map[string]struct{}) + cfg.Tables = make(map[string]struct{}) + + if dbFlag := flags.Lookup(flagDatabase); dbFlag != nil { + db := dbFlag.Value.String() + var f filter.Filter + if len(db) == 0 { + return errors.Annotate(berrors.ErrInvalidArgument, "empty database name is not allowed") + } + cfg.Schemas[utils.EncloseName(db)] = struct{}{} + if tblFlag := flags.Lookup(flagTable); tblFlag != nil { + tbl := tblFlag.Value.String() + if len(tbl) == 0 { + return errors.Annotate(berrors.ErrInvalidArgument, "empty table name is not allowed") + } + cfg.Tables[utils.EncloseDBAndTable(db, tbl)] = struct{}{} + f = filter.NewTablesFilter(filter.Table{ + Schema: db, + Name: tbl, + }) + } else { + f = filter.NewSchemasFilter(db) + } + // make it compatible to existing db/table flag + if utils.IsSysDB(db) { + userFilter = utils.NewRejectAllFilter() + sysFilter = f + } else { + userFilter = f + sysFilter = utils.NewRejectAllFilter() + } + cfg.TableFilter = utils.NewCombinedFilter(userFilter, sysFilter) + } + return nil +} + +func buildFilterFromFilterFlag(cfg *Config, flags *pflag.FlagSet) error { + if cfg.TableFilter != nil { + return errors.Annotatef(berrors.ErrInvalidArgument, "--filter is not compatible with --db and --table") + } + var err error + + // get user table filter + var userFilter filter.Filter + var sysFilter filter.Filter + + if filterFlag := flags.Lookup(flagFilter); filterFlag != nil { + cfg.ExplicitFilter = flags.Changed(flagFilter) + cfg.FilterStr = filterFlag.Value.(pflag.SliceValue).GetSlice() + userFilter, err = filter.Parse(cfg.FilterStr) + if err != nil { + return errors.Trace(err) + } + } else { + userFilter = utils.NewAcceptAllFilter() + } + + // get sys table filter + if sysFilterFlag := flags.Lookup(flagSysFilter); sysFilterFlag != nil { + cfg.ExplicitFilter = flags.Changed(flagSysFilter) + sysFilter, err = filter.Parse(sysFilterFlag.Value.(pflag.SliceValue).GetSlice()) + if err != nil { + return errors.Trace(err) + } + } else { + // compatible to previous default behavior + sysFilter = utils.NewAcceptAllFilter() + } + + // combine sys table filter and user table filter + cfg.TableFilter = utils.NewCombinedFilter(userFilter, sysFilter) + + // get caseSensitive + if caseSensitiveFlag := flags.Lookup(flagCaseSensitive); caseSensitiveFlag != nil { + caseSensitive, err := flags.GetBool(flagCaseSensitive) + if err != nil { + return errors.Trace(err) + } + if !caseSensitive { + cfg.TableFilter = cfg.TableFilter.ToCaseInsensitive() + } + } + return nil +} + func (cfg *Config) parseAndValidateMasterKeyInfo(hasPlaintextKey bool, flags *pflag.FlagSet) error { masterKeyString, err := flags.GetString(flagMasterKeyConfig) if err != nil { @@ -1000,3 +1069,9 @@ func progressFileWriterRoutine(ctx context.Context, progress glue.Progress, tota } } } + +func WriteStringToConsole(g glue.Glue, msg string) error { + b := []byte(msg) + _, err := glue.GetConsole(g).Out().Write(b) + return err +} diff --git a/br/pkg/task/common_test.go b/br/pkg/task/common_test.go index cc0c64e531857..d8cf1f6767dba 100644 --- a/br/pkg/task/common_test.go +++ b/br/pkg/task/common_test.go @@ -12,7 +12,6 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/config" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/spf13/pflag" "github.com/stretchr/testify/require" ) @@ -272,7 +271,7 @@ func expectedDefaultConfig() Config { SendCreds: true, CheckRequirements: true, FilterStr: []string(nil), - TableFilter: filter.CaseInsensitive(must(filter.Parse([]string{"*.*"}))), + TableFilter: utils.NewCombinedFilterAcceptAll().ToCaseInsensitive(), Schemas: map[string]struct{}{}, Tables: map[string]struct{}{}, SwitchModeInterval: 300000000000, diff --git a/br/pkg/task/config_test.go b/br/pkg/task/config_test.go index 8bd7d72e4f3bb..436791076fe13 100644 --- a/br/pkg/task/config_test.go +++ b/br/pkg/task/config_test.go @@ -194,7 +194,7 @@ func TestCheckRestoreDBAndTable(t *testing.T) { for _, db := range ca.backupDBs { backupDBs = append(backupDBs, db) } - err := CheckRestoreDBAndTable(backupDBs, cfg) + err := VerifyDBAndTableInBackup(backupDBs, cfg) require.NoError(t, err) } } diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 94572ff301ba6..f1061982360f7 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/br/pkg/restore" snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" @@ -597,8 +598,8 @@ func CheckNewCollationEnable( return enabled, nil } -// CheckRestoreDBAndTable is used to check whether the restore dbs or tables have been backup -func CheckRestoreDBAndTable(schemas []*metautil.Database, cfg *RestoreConfig) error { +// VerifyDBAndTableInBackup is used to check whether the restore dbs or tables have been backup +func VerifyDBAndTableInBackup(schemas []*metautil.Database, cfg *RestoreConfig) error { if len(cfg.Schemas) == 0 && len(cfg.Tables) == 0 { return nil } @@ -618,6 +619,8 @@ func CheckRestoreDBAndTable(schemas []*metautil.Database, cfg *RestoreConfig) er tablesMap[utils.EncloseDBAndTable(dbName, table.Info.Name.L)] = struct{}{} } } + + // check on if explicit schema/table filter matches restoreSchemas := cfg.Schemas restoreTables := cfg.Tables for schema := range restoreSchemas { @@ -708,7 +711,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err := version.CheckClusterVersion(c, mgr.GetPDClient(), version.CheckVersionForBR); err != nil { return errors.Trace(err) } - restoreError = runSnapshotRestore(c, mgr, g, cmdName, cfg, nil) + snapshotRestoreConfig := SnapshotRestoreConfig{ + RestoreConfig: cfg, + } + restoreError = runSnapshotRestore(c, mgr, g, cmdName, &snapshotRestoreConfig) } if restoreError != nil { return errors.Trace(restoreError) @@ -745,7 +751,13 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } -func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName string, cfg *RestoreConfig, checkInfo *PiTRTaskInfo) error { +type SnapshotRestoreConfig struct { + *RestoreConfig + piTRTaskInfo *PiTRTaskInfo + logTableHistoryManager *stream.LogBackupTableHistoryManager +} + +func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName string, cfg *SnapshotRestoreConfig) error { cfg.Adjust() defer summary.Summary(cmdName) ctx, cancel := context.WithCancel(c) @@ -758,9 +770,31 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s ctx = opentracing.ContextWithSpan(ctx, span1) } - codec := mgr.GetStorage().GetCodec() + // reads out information from backup meta file and do requirement checking if needed + u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) + if err != nil { + return errors.Trace(err) + } + if cfg.CheckRequirements { + log.Info("Checking incompatible TiCDC changefeeds before restoring.", + logutil.ShortError(err), zap.Uint64("restore-ts", backupMeta.EndVersion)) + if err := checkIncompatibleChangefeed(ctx, backupMeta.EndVersion, mgr.GetDomain().GetEtcdClient()); err != nil { + return errors.Trace(err) + } - // need retrieve these configs from tikv if not set in command. + backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) + if backupVersion != nil { + if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { + return errors.Trace(versionErr) + } + } + } + if _, err = CheckNewCollationEnable(backupMeta.GetNewCollationsEnabled(), g, mgr.GetStorage(), cfg.CheckRequirements); err != nil { + return errors.Trace(err) + } + + // build restore client + // need to retrieve these configs from tikv if not set in command. kvConfigs := &pconfig.KVConfig{ ImportGoroutines: cfg.ConcurrencyPerStore, MergeRegionSize: cfg.MergeSmallRegionSizeBytes, @@ -779,42 +813,19 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s cfg.ConcurrencyPerStore = kvConfigs.ImportGoroutines // using tikv config to set the concurrency-per-store for client. client.SetConcurrencyPerStore(cfg.ConcurrencyPerStore.Value) - err := configureRestoreClient(ctx, client, cfg) + err = configureRestoreClient(ctx, client, cfg.RestoreConfig) if err != nil { return errors.Trace(err) } - // Init DB connection sessions - err = client.Init(g, mgr.GetStorage()) + // InitConnections DB connection sessions + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() - if err != nil { return errors.Trace(err) } - u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) - if err != nil { - return errors.Trace(err) - } - if cfg.CheckRequirements { - err := checkIncompatibleChangefeed(ctx, backupMeta.EndVersion, mgr.GetDomain().GetEtcdClient()) - log.Info("Checking incompatible TiCDC changefeeds before restoring.", - logutil.ShortError(err), zap.Uint64("restore-ts", backupMeta.EndVersion)) - if err != nil { - return errors.Trace(err) - } - } - - backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) - if cfg.CheckRequirements && backupVersion != nil { - if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { - return errors.Trace(versionErr) - } - } - if _, err = CheckNewCollationEnable(backupMeta.GetNewCollationsEnabled(), g, mgr.GetStorage(), cfg.CheckRequirements); err != nil { - return errors.Trace(err) - } - reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.LoadSchemaIfNeededAndInitClient(c, backupMeta, u, reader, cfg.LoadStats, nil, nil); err != nil { + metaReader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) + if err = client.LoadSchemaIfNeededAndInitClient(ctx, backupMeta, u, metaReader, cfg.LoadStats, nil, nil); err != nil { return errors.Trace(err) } @@ -833,15 +844,41 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s existsCheckpointMetadata := checkpoint.ExistsSstRestoreCheckpoint(ctx, mgr.GetDomain(), checkpoint.SnapshotRestoreCheckpointDatabaseName) checkpointFirstRun = !existsCheckpointMetadata } - if err = CheckRestoreDBAndTable(client.GetDatabases(), cfg); err != nil { + if err = VerifyDBAndTableInBackup(client.GetDatabases(), cfg.RestoreConfig); err != nil { return err } - files, tables, dbs := filterRestoreFiles(client, cfg) - if len(dbs) == 0 && len(tables) != 0 { - return errors.Annotate(berrors.ErrRestoreInvalidBackup, "contain tables but no databases") + + // filters out db/table/files using filter + fileMap, tableMap, dbMap, err := filterRestoreFiles(client, cfg.RestoreConfig) + if err != nil { + return errors.Trace(err) } + log.Info("found items to restore after filtering", + zap.Int("files", len(fileMap)), + zap.Int("tables", len(tableMap)), + zap.Int("db", len(dbMap))) + + // only run when this full restore is part of the PiTR + if cfg.logTableHistoryManager != nil { + // adjust tables to restore in the snapshot restore phase since it will later be renamed during + // log restore and will fall into or out of the filter range. + err := adjustTablesToRestoreAndCreateFilter(cfg.logTableHistoryManager, cfg.RestoreConfig, client, fileMap, tableMap) + if err != nil { + return errors.Trace(err) + } + + log.Info("adjusted items to restore", + zap.Int("files", len(fileMap)), + zap.Int("tables", len(tableMap)), + zap.Int("db", len(dbMap))) + + // need to update to include all eligible table id from snapshot restore + UpdatePiTRFilter(cfg.RestoreConfig, tableMap) + } + files, tables, dbs := convertMapsToSlices(fileMap, tableMap, dbMap) if cfg.CheckRequirements && checkpointFirstRun { + // after figuring out what files to restore, check if disk has enough space if err := checkDiskSpace(ctx, mgr, files, tables); err != nil { return errors.Trace(err) } @@ -857,28 +894,29 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // for full + log restore. should check the cluster is empty. - if client.IsFull() && checkInfo != nil && checkInfo.FullRestoreCheckErr != nil { - return checkInfo.FullRestoreCheckErr + if client.IsFull() && cfg.piTRTaskInfo != nil && cfg.piTRTaskInfo.FullRestoreCheckErr != nil { + return cfg.piTRTaskInfo.FullRestoreCheckErr } importModeSwitcher := restore.NewImportModeSwitcher(mgr.GetPDClient(), cfg.Config.SwitchModeInterval, mgr.GetTLSConfig()) - restoreSchedulers, schedulersConfig, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, true) + restoreSchedulersFunc, schedulersConfig, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, true) if err != nil { return errors.Trace(err) } - schedulersRemovable := false + // need to know whether restore has been completed so can restore schedulers + canRestoreSchedulers := false defer func() { // don't reset pd scheduler if checkpoint mode is used and restored is not finished - if cfg.UseCheckpoint && !schedulersRemovable { - log.Info("skip removing pd schehduler for next retry") + if cfg.UseCheckpoint && !canRestoreSchedulers { + log.Info("skip removing pd scheduler for next retry") return } - log.Info("start to remove the pd scheduler") + log.Info("start to restore pd scheduler") // run the post-work to avoid being stuck in the import // mode or emptied schedulers. - restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulers, cfg.Online) - log.Info("finish removing pd scheduler") + restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulersFunc, cfg.Online) + log.Info("finish restoring pd scheduler") }() if isFullRestore(cmdName) { @@ -896,7 +934,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } } else if client.IsFull() && checkpointFirstRun && cfg.CheckRequirements { if err := checkTableExistence(ctx, mgr, tables, g); err != nil { - schedulersRemovable = true + canRestoreSchedulers = true return errors.Trace(err) } } @@ -908,8 +946,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // preallocate the table id, because any ddl job or database creation(include checkpoint) also allocates the global ID - err = client.AllocTableIDs(ctx, tables) - if err != nil { + if err = client.AllocTableIDs(ctx, tables); err != nil { return errors.Trace(err) } @@ -921,7 +958,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } if restoreSchedulersConfigFromCheckpoint != nil { - restoreSchedulers = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint) + restoreSchedulersFunc = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint) } checkpointSetWithTableID = sets @@ -929,7 +966,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s // need to flush the whole checkpoint data so that br can quickly jump to // the log kv restore step when the next retry. log.Info("wait for flush checkpoint...") - client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !schedulersRemovable) + client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !canRestoreSchedulers) }() } @@ -1003,8 +1040,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // execute DDL first - err = client.ExecDDLs(ctx, ddlJobs) - if err != nil { + if err = client.ExecDDLs(ctx, ddlJobs); err != nil { return errors.Trace(err) } @@ -1039,6 +1075,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } + codec := mgr.GetStorage().GetCodec() if len(files) == 0 { log.Info("no files, empty databases and tables are restored") summary.SetSuccessStatus(true) @@ -1161,7 +1198,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } - schedulersRemovable = true + canRestoreSchedulers = true // Set task summary to success status. summary.SetSuccessStatus(true) @@ -1376,35 +1413,166 @@ func dropToBlackhole( return outCh } -// filterRestoreFiles filters tables that can't be processed after applying cfg.TableFilter.MatchTable. -// if the db has no table that can be processed, the db will be filtered too. +// filterRestoreFiles filters out dbs and tables. func filterRestoreFiles( client *snapclient.SnapClient, cfg *RestoreConfig, -) (files []*backuppb.File, tables []*metautil.Table, dbs []*metautil.Database) { +) (fileMap map[string]*backuppb.File, tableMap map[int64]*metautil.Table, dbMap map[int64]*metautil.Database, err error) { + // Initialize maps + fileMap = make(map[string]*backuppb.File) + tableMap = make(map[int64]*metautil.Table) + dbMap = make(map[int64]*metautil.Database) + for _, db := range client.GetDatabases() { - dbName := db.Info.Name.O - if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { - dbName = name - } if checkpoint.IsCheckpointDB(db.Info.Name) { continue } + dbName := db.Info.Name.O + log.Info("####### getting db", zap.String("db", dbName)) if !cfg.TableFilter.MatchSchema(dbName) { continue } - dbs = append(dbs, db) + log.Info("####### passing db to restore in full", zap.String("db", dbName)) + dbMap[db.Info.ID] = db for _, table := range db.Tables { if table.Info == nil || !cfg.TableFilter.MatchTable(dbName, table.Info.Name.O) { continue } - files = append(files, table.Files...) - tables = append(tables, table) + log.Info("####### passing tables to restore in full", zap.String("table", table.Info.Name.O)) + // Add table to tableMap using table ID as key + tableMap[table.Info.ID] = table + + // Add files to fileMap using file name as key + for _, file := range table.Files { + fileMap[file.Name] = file + } } } + + // sanity check + if len(dbMap) == 0 && len(tableMap) != 0 { + err = errors.Annotate(berrors.ErrRestoreInvalidBackup, "contains tables but no databases") + } return } +func adjustTablesToRestoreAndCreateFilter( + logBackupTableHistory *stream.LogBackupTableHistoryManager, + cfg *RestoreConfig, + client *snapclient.SnapClient, + fileMap map[string]*backuppb.File, + tableMap map[int64]*metautil.Table, +) (err error) { + snapshotDBMap := client.GetDatabaseMap() + + // build filter for pitr restore to use later + piTRTableFilter := utils.NewPiTRTableFilter() + + // put all the newly created db that matches the filter during log backup into the pitr filter + newlyCreatedDBs := logBackupTableHistory.GetNewlyCreatedDBHistory() + for dbId, dbName := range newlyCreatedDBs { + if cfg.TableFilter.MatchSchema(dbName) { + log.Info("######### pitr adjust db ", zap.Any("db", dbName)) + piTRTableFilter.UpdateDB(dbId) + } + } + + // get all the tables seen during the log backup + tableHistory := logBackupTableHistory.GetTableHistory() + + for tableID, dbIDAndTableName := range tableHistory { + start := dbIDAndTableName[0] + end := dbIDAndTableName[1] + + var dbName string + if snapDb, exists := snapshotDBMap[end.DbID]; exists { + dbName = snapDb.Info.Name.O + } else if name, exists := logBackupTableHistory.GetDBNameByID(end.DbID); exists { + // if db id does not exist in the snapshot, meaning it's created during log backup + dbName = name + } else { + log.Warn("did not find db id in full/log backup, "+ + "likely the full&log backup provided have different filters, ignoring this db", + zap.Any("dbId", end.DbID)) + continue + } + + // skip if db doesn't match + if !cfg.TableFilter.MatchSchema(dbName) { + continue + } + + // handle in filter range cases + // 1. original == current, didn't have renaming + // 2. original has been renamed and current is in the filter range + // we need to restore original table + if cfg.TableFilter.MatchTable(dbName, end.TableName) { + // put this db/table id into pitr filter as it matches with user's filter + // have to update filter here since table might be empty or not in snapshot so nothing will be returned . + // but we still need to capture this table id to restore during log restore. + piTRTableFilter.UpdateTable(end.DbID, tableID) + + // check if snapshot contains the original db/table + originalDB, exists := snapshotDBMap[start.DbID] + if !exists { + // original db created during log backup, snapshot doesn't have information about this db so doesn't + // need to restore at snapshot + continue + } + + // need to restore the matching table in snapshot restore phase + for _, originalTable := range originalDB.Tables { + if originalTable.Info == nil { + continue + } + if originalTable.Info.ID == tableID { + for _, file := range originalTable.Files { + fileMap[file.Name] = file + } + tableMap[originalTable.Info.ID] = originalTable + // only one table id will match + break + } + } + // handle case where current is not in range and original was in range, we need to remove the original from + // restoring + } else if cfg.TableFilter.MatchTable(dbName, start.TableName) { + // remove it from the filter, will not remove db even table size becomes 0 + _ = piTRTableFilter.Remove(start.DbID, tableID) + + // check if snapshot contains the original db/table + originalDB, exists := snapshotDBMap[start.DbID] + if !exists { + // original db created during log backup, no need to process further + continue + } + for _, originalTable := range originalDB.Tables { + if originalTable.Info == nil { + continue + } + if originalTable.Info.ID == tableID { + for _, file := range originalTable.Files { + delete(fileMap, file.Name) + } + delete(tableMap, originalTable.Info.ID) + // only one table id will match + break + } + } + } + } + // store the filter into config + log.Info("pitr table filter", zap.String("map", piTRTableFilter.String())) + cfg.PiTRTableFilter = piTRTableFilter + return +} + +func UpdatePiTRFilter(cfg *RestoreConfig, tableMap map[int64]*metautil.Table) { + for _, table := range tableMap { + cfg.PiTRTableFilter.UpdateTable(table.DB.ID, table.Info.ID) + } +} + // tweakLocalConfForRestore tweaks some of configs of TiDB to make the restore progress go well. // return a function that could restore the config to origin. func tweakLocalConfForRestore() func() { @@ -1655,3 +1823,28 @@ func afterTableRestoredCh(ctx context.Context, createdTables []*snapclient.Creat }() return outCh } + +func convertMapsToSlices( + fileMap map[string]*backuppb.File, + tableMap map[int64]*metautil.Table, + dbMap map[int64]*metautil.Database, +) ([]*backuppb.File, []*metautil.Table, []*metautil.Database) { + files := make([]*backuppb.File, 0, len(fileMap)) + for _, file := range fileMap { + files = append(files, file) + } + + tables := make([]*metautil.Table, 0, len(tableMap)) + for _, table := range tableMap { + tables = append(tables, table) + log.Info("####### after table", zap.Any("table", table.Info.ID), zap.Any("name", table.Info.Name.String())) + } + + dbs := make([]*metautil.Database, 0, len(dbMap)) + for _, db := range dbMap { + dbs = append(dbs, db) + log.Info("####### after db", zap.Any("db", db.Info.ID), zap.Any("name", db.Info.Name.String())) + } + + return files, tables, dbs +} diff --git a/br/pkg/task/restore_raw.go b/br/pkg/task/restore_raw.go index 1680e60472f7b..f38040d569b70 100644 --- a/br/pkg/task/restore_raw.go +++ b/br/pkg/task/restore_raw.go @@ -101,7 +101,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR client.SetRateLimit(cfg.RateLimit) client.SetCrypter(&cfg.CipherInfo) client.SetConcurrencyPerStore(cfg.ConcurrencyPerStore.Value) - err = client.Init(g, mgr.GetStorage()) + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() if err != nil { return errors.Trace(err) diff --git a/br/pkg/task/restore_test.go b/br/pkg/task/restore_test.go index 86ceb3755ee09..5cb3c8a67abe7 100644 --- a/br/pkg/task/restore_test.go +++ b/br/pkg/task/restore_test.go @@ -308,6 +308,8 @@ func TestFilterDDLJobs(t *testing.T) { ddlJobs := task.FilterDDLJobs(allDDLJobs, tables) for _, job := range ddlJobs { t.Logf("get ddl job: %s", job.Query) + t.Logf("table name: %s", job.TableName) + t.Logf("dbid: %s", job.SchemaName) } require.Equal(t, 7, len(ddlJobs)) } diff --git a/br/pkg/task/restore_txn.go b/br/pkg/task/restore_txn.go index 16d8e099f659d..720d40dc6bf69 100644 --- a/br/pkg/task/restore_txn.go +++ b/br/pkg/task/restore_txn.go @@ -44,7 +44,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) client.SetRateLimit(cfg.RateLimit) client.SetCrypter(&cfg.CipherInfo) client.SetConcurrencyPerStore(uint(cfg.Concurrency)) - err = client.Init(g, mgr.GetStorage()) + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() if err != nil { return errors.Trace(err) diff --git a/br/pkg/task/stream.go b/br/pkg/task/stream.go index d56b9b021354a..a1a3cb3eae479 100644 --- a/br/pkg/task/stream.go +++ b/br/pkg/task/stream.go @@ -417,7 +417,6 @@ func (s *streamMgr) setGCSafePoint(ctx context.Context, sp utils.BRServiceSafePo func (s *streamMgr) buildObserveRanges() ([]kv.KeyRange, error) { dRanges, err := stream.BuildObserveDataRanges( s.mgr.GetStorage(), - s.cfg.FilterStr, s.cfg.TableFilter, s.cfg.StartTS, ) @@ -467,11 +466,11 @@ func (s *streamMgr) checkStreamStartEnable(ctx context.Context) error { return nil } -type RestoreFunc func(string) error +type RestoreGcFunc func(string) error -// KeepGcDisabled keeps GC disabled and return a function that used to gc enabled. +// DisableGc disables and returns a function that can enable gc back. // gc.ratio-threshold = "-1.0", which represents disable gc in TiKV. -func KeepGcDisabled(g glue.Glue, store kv.Storage) (RestoreFunc, string, error) { +func DisableGc(g glue.Glue, store kv.Storage) (RestoreGcFunc, string, error) { se, err := g.CreateSession(store) if err != nil { return nil, "", errors.Trace(err) @@ -582,7 +581,7 @@ func RunStreamStart( // locked means this is a stream task restart. Or create a new stream task. if locked { - logInfo, err := getLogRange(ctx, &cfg.Config) + logInfo, err := getLogInfo(ctx, &cfg.Config) if err != nil { return errors.Trace(err) } @@ -696,13 +695,13 @@ func RunStreamMetadata( ctx = opentracing.ContextWithSpan(ctx, span1) } - logInfo, err := getLogRange(ctx, &cfg.Config) + logInfo, err := getLogInfo(ctx, &cfg.Config) if err != nil { return errors.Trace(err) } - logMinDate := stream.FormatDate(oracle.GetTimeFromTS(logInfo.logMinTS)) - logMaxDate := stream.FormatDate(oracle.GetTimeFromTS(logInfo.logMaxTS)) + logMinDate := utils.FormatDate(oracle.GetTimeFromTS(logInfo.logMinTS)) + logMaxDate := utils.FormatDate(oracle.GetTimeFromTS(logInfo.logMaxTS)) summary.Log(cmdName, zap.Uint64("log-min-ts", logInfo.logMinTS), zap.String("log-min-date", logMinDate), zap.Uint64("log-max-ts", logInfo.logMaxTS), @@ -1185,7 +1184,8 @@ func checkIncompatibleChangefeed(ctx context.Context, backupTS uint64, etcdCLI * return nil } -// RunStreamRestore restores stream log. +// RunStreamRestore is the entry point to do PiTR restore. It can optionally start a full/snapshot restore followed +// by the log restore. func RunStreamRestore( c context.Context, mgr *conn.Mgr, @@ -1204,7 +1204,7 @@ func RunStreamRestore( if err != nil { return errors.Trace(err) } - logInfo, err := getLogRangeWithStorage(ctx, s) + logInfo, err := getLogInfoFromStorage(ctx, s) if err != nil { return errors.Trace(err) } @@ -1242,7 +1242,7 @@ func RunStreamRestore( return errors.Trace(err) } - checkInfo, err := checkPiTRTaskInfo(ctx, mgr, g, cfg) + taskInfo, err := generatePiTRTaskInfo(ctx, mgr, g, cfg) if err != nil { return errors.Trace(err) } @@ -1251,42 +1251,90 @@ func RunStreamRestore( failpoint.Return(errors.New("failpoint: failed before full restore")) }) - recorder := tiflashrec.New() - cfg.tiflashRecorder = recorder + cfg.tiflashRecorder = tiflashrec.New() + logClient, err := createLogClient(ctx, g, cfg, mgr) + if err != nil { + return errors.Trace(err) + } + defer logClient.Close(ctx) + + ddlFiles, err := logClient.LoadDDLFiles(ctx) + if err != nil { + return errors.Trace(err) + } + // TODO: pitr filtered restore doesn't support restore system table yet, hacky way to override the sys filter here + if cfg.ExplicitFilter { + user, _ := cfg.TableFilter.GetFilters() + cfg.TableFilter = utils.NewCombinedFilterNoSystem(user) + } + metaInfoProcessor := logclient.NewMetaKVInfoProcessor(logClient) + // only doesn't need to build if id map has been saved during log restore + idMapSaved := isCurrentIdMapSaved(taskInfo.CheckpointInfo) + if !idMapSaved { + // we restore additional tables at full snapshot phase when it is renamed into the filter range + // later in log backup. + // we also ignore the tables that currently in filter range but later renamed out of the filter. + log.Info("reading meta kv files to collect table renaming and id mapping information") + err = metaInfoProcessor.ReadMetaKVFilesAndBuildInfo(ctx, ddlFiles) + if err != nil { + return errors.Trace(err) + } + dbReplace := metaInfoProcessor.GetTableMappingManager().DBReplaceMap + stream.LogDBReplaceMap("scanning log meta kv before snapshot restore", dbReplace) + } + // restore full snapshot. - if checkInfo.NeedFullRestore { + if taskInfo.NeedFullRestore { logStorage := cfg.Config.Storage cfg.Config.Storage = cfg.FullBackupStorage + snapshotRestoreConfig := SnapshotRestoreConfig{ + RestoreConfig: cfg, + piTRTaskInfo: taskInfo, + logTableHistoryManager: metaInfoProcessor.GetTableHistoryManager(), + } // TiFlash replica is restored to down-stream on 'pitr' currently. - if err = runSnapshotRestore(ctx, mgr, g, FullRestoreCmd, cfg, checkInfo); err != nil { + if err = runSnapshotRestore(ctx, mgr, g, FullRestoreCmd, &snapshotRestoreConfig); err != nil { return errors.Trace(err) } cfg.Config.Storage = logStorage } else if len(cfg.FullBackupStorage) > 0 { - skipMsg := []byte(fmt.Sprintf("%s command is skipped due to checkpoint mode for restore\n", FullRestoreCmd)) - if _, err := glue.GetConsole(g).Out().Write(skipMsg); err != nil { + if err = WriteStringToConsole(g, fmt.Sprintf("%s is skipped due to checkpoint mode for restore\n", FullRestoreCmd)); err != nil { return errors.Trace(err) } - if checkInfo.CheckpointInfo != nil && checkInfo.CheckpointInfo.Metadata != nil && checkInfo.CheckpointInfo.Metadata.TiFlashItems != nil { + if taskInfo.hasTiFlashItemsInCheckpoint() { log.Info("load tiflash records of snapshot restore from checkpoint") - cfg.tiflashRecorder.Load(checkInfo.CheckpointInfo.Metadata.TiFlashItems) + cfg.tiflashRecorder.Load(taskInfo.CheckpointInfo.Metadata.TiFlashItems) } } // restore log. cfg.adjustRestoreConfigForStreamRestore() - if err := restoreStream(ctx, mgr, g, cfg, checkInfo.CheckpointInfo); err != nil { + logRestoreConfig := &LogRestoreConfig{ + RestoreConfig: cfg, + checkpointTaskInfo: taskInfo.CheckpointInfo, + tableMappingManager: metaInfoProcessor.GetTableMappingManager(), + logClient: logClient, + ddlFiles: ddlFiles, + } + if err := restoreStream(ctx, mgr, g, logRestoreConfig); err != nil { return errors.Trace(err) } return nil } -// RunStreamRestore start restore job +type LogRestoreConfig struct { + *RestoreConfig + checkpointTaskInfo *checkpoint.CheckpointTaskInfoForLogRestore + tableMappingManager *stream.TableMappingManager + logClient *logclient.LogClient + ddlFiles []logclient.Log +} + +// restoreStream starts the log restore func restoreStream( c context.Context, mgr *conn.Mgr, g glue.Glue, - cfg *RestoreConfig, - taskInfo *checkpoint.CheckpointTaskInfoForLogRestore, + cfg *LogRestoreConfig, ) (err error) { var ( totalKVCount uint64 @@ -1306,9 +1354,9 @@ func restoreStream( zap.Uint64("source-start-point", cfg.StartTS), zap.Uint64("source-end-point", cfg.RestoreTS), zap.Uint64("target-end-point", currentTS), - zap.String("source-start", stream.FormatDate(oracle.GetTimeFromTS(cfg.StartTS))), - zap.String("source-end", stream.FormatDate(oracle.GetTimeFromTS(cfg.RestoreTS))), - zap.String("target-end", stream.FormatDate(oracle.GetTimeFromTS(currentTS))), + zap.String("source-start", utils.FormatDate(oracle.GetTimeFromTS(cfg.StartTS))), + zap.String("source-end", utils.FormatDate(oracle.GetTimeFromTS(cfg.RestoreTS))), + zap.String("target-end", utils.FormatDate(oracle.GetTimeFromTS(currentTS))), zap.Uint64("total-kv-count", totalKVCount), zap.Uint64("skipped-kv-count-by-checkpoint", checkpointTotalKVCount), zap.String("total-size", units.HumanSize(float64(totalSize))), @@ -1333,38 +1381,31 @@ func restoreStream( ctx = opentracing.ContextWithSpan(ctx, span1) } - client, err := createRestoreClient(ctx, g, cfg, mgr) + client := cfg.logClient + ddlFiles := cfg.ddlFiles + tableMappingManager := cfg.tableMappingManager + + currentTS, err = getCurrentTSFromCheckpointOrPD(ctx, mgr, cfg) if err != nil { - return errors.Annotate(err, "failed to create restore client") + return errors.Trace(err) } - defer client.Close(ctx) - if taskInfo != nil && taskInfo.Metadata != nil { - // reuse the task's rewrite ts - log.Info("reuse the task's rewrite ts", zap.Uint64("rewrite-ts", taskInfo.Metadata.RewriteTS)) - currentTS = taskInfo.Metadata.RewriteTS - } else { - currentTS, err = restore.GetTSWithRetry(ctx, mgr.GetPDClient()) - if err != nil { - return errors.Trace(err) - } - } if err := client.SetCurrentTS(currentTS); err != nil { return errors.Trace(err) } importModeSwitcher := restore.NewImportModeSwitcher(mgr.GetPDClient(), cfg.Config.SwitchModeInterval, mgr.GetTLSConfig()) - restoreSchedulers, _, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, false) + restoreSchedulersFunc, _, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, false) if err != nil { return errors.Trace(err) } // Always run the post-work even on error, so we don't stuck in the import // mode or emptied schedulers - defer restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulers, cfg.Online) + defer restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulersFunc, cfg.Online) // It need disable GC in TiKV when PiTR. // because the process of PITR is concurrent and kv events isn't sorted by tso. - restoreGc, oldRatio, err := KeepGcDisabled(g, mgr.GetStorage()) + restoreGcFunc, oldGcRatio, err := DisableGc(g, mgr.GetStorage()) if err != nil { return errors.Trace(err) } @@ -1376,89 +1417,42 @@ func restoreStream( return } - // If the oldRatio is negative, which is not normal status. + // If the oldGcRatio is negative, which is not normal status. // It should set default value "1.1" after PiTR finished. - if strings.HasPrefix(oldRatio, "-") { - log.Warn("the original gc-ratio is negative, reset by default value 1.1", zap.String("old-gc-ratio", oldRatio)) - oldRatio = utils.DefaultGcRatioVal + if strings.HasPrefix(oldGcRatio, "-") { + log.Warn("the original gc-ratio is negative, reset by default value 1.1", zap.String("old-gc-ratio", oldGcRatio)) + oldGcRatio = utils.DefaultGcRatioVal } - log.Info("start to restore gc", zap.String("ratio", oldRatio)) - if err := restoreGc(oldRatio); err != nil { - log.Error("failed to set gc enabled", zap.Error(err)) + log.Info("start to restore gc", zap.String("ratio", oldGcRatio)) + if err := restoreGcFunc(oldGcRatio); err != nil { + log.Error("failed to restore gc", zap.Error(err)) } log.Info("finish restoring gc") }() var sstCheckpointSets map[string]struct{} if cfg.UseCheckpoint { - oldRatioFromCheckpoint, err := client.InitCheckpointMetadataForLogRestore(ctx, cfg.StartTS, cfg.RestoreTS, oldRatio, cfg.tiflashRecorder) + gcRatioFromCheckpoint, err := client.LoadOrCreateCheckpointMetadataForLogRestore(ctx, cfg.StartTS, cfg.RestoreTS, oldGcRatio, cfg.tiflashRecorder) if err != nil { return errors.Trace(err) } - oldRatio = oldRatioFromCheckpoint + oldGcRatio = gcRatioFromCheckpoint sstCheckpointSets, err = client.InitCheckpointMetadataForCompactedSstRestore(ctx) if err != nil { return errors.Trace(err) } } - encryptionManager, err := encryption.NewManager(&cfg.LogBackupCipherInfo, &cfg.MasterKeyConfig) - if err != nil { - return errors.Annotate(err, "failed to create encryption manager for log restore") - } - defer encryptionManager.Close() - err = client.InstallLogFileManager(ctx, cfg.StartTS, cfg.RestoreTS, cfg.MetadataDownloadBatchSize, encryptionManager) - if err != nil { - return err - } - migs, err := client.GetMigrations(ctx) - if err != nil { - return errors.Trace(err) - } - client.BuildMigrations(migs) - // get full backup meta storage to generate rewrite rules. - fullBackupStorage, err := parseFullBackupTablesStorage(cfg) - if err != nil { + // build and save id map + if err := buildAndSaveIDMapIfNeeded(ctx, client, cfg, tableMappingManager); err != nil { return errors.Trace(err) } - // load the id maps only when the checkpoint mode is used and not the first execution - currentIdMapSaved := false - if taskInfo != nil && taskInfo.Progress == checkpoint.InLogRestoreAndIdMapPersist { - currentIdMapSaved = true - } - - ddlFiles, err := client.LoadDDLFilesAndCountDMLFiles(ctx) - if err != nil { - return err - } - - // get the schemas ID replace information. - // since targeted full backup storage, need to use the full backup cipher - tableMappingManager, err := client.BuildTableMappingManager(ctx, &logclient.BuildTableMappingManagerConfig{ - CurrentIdMapSaved: currentIdMapSaved, - TableFilter: cfg.TableFilter, - FullBackupStorage: fullBackupStorage, - CipherInfo: &cfg.Config.CipherInfo, - Files: ddlFiles, - }) + // build schema replace + schemasReplace, err := buildSchemaReplace(client, cfg, tableMappingManager) if err != nil { return errors.Trace(err) } - schemasReplace := stream.NewSchemasReplace(tableMappingManager.DbReplaceMap, cfg.tiflashRecorder, - client.CurrentTS(), cfg.TableFilter, client.RecordDeleteRange) - schemasReplace.AfterTableRewritten = func(deleted bool, tableInfo *model.TableInfo) { - // When the table replica changed to 0, the tiflash replica might be set to `nil`. - // We should remove the table if we meet. - if deleted || tableInfo.TiFlashReplica == nil { - cfg.tiflashRecorder.DelTable(tableInfo.ID) - return - } - cfg.tiflashRecorder.AddTable(tableInfo.ID, *tableInfo.TiFlashReplica) - // Remove the replica firstly. Let's restore them at the end. - tableInfo.TiFlashReplica = nil - } - updateStats := func(kvCount uint64, size uint64) { mu.Lock() defer mu.Unlock() @@ -1467,14 +1461,14 @@ func restoreStream( } pm := g.StartProgress(ctx, "Restore Meta Files", int64(len(ddlFiles)), !cfg.LogProgress) + var rp *logclient.RestoreMetaKVProcessor if err = withProgress(pm, func(p glue.Progress) error { - client.RunGCRowsLoader(ctx) - return client.RestoreAndRewriteMetaKVFiles(ctx, ddlFiles, schemasReplace, updateStats, p.Inc) + rp = logclient.NewRestoreMetaKVProcessor(client, schemasReplace, updateStats, p.Inc) + return rp.RestoreAndRewriteMetaKVFiles(ctx, ddlFiles) }); err != nil { return errors.Annotate(err, "failed to restore meta files") } - - rewriteRules := initRewriteRules(schemasReplace) + rewriteRules := buildRewriteRules(schemasReplace) ingestRecorder := schemasReplace.GetIngestRecorder() if err := rangeFilterFromIngestRecorder(ingestRecorder, rewriteRules); err != nil { @@ -1561,10 +1555,12 @@ func restoreStream( return errors.Annotate(err, "failed to clean up") } + // to delete range(table, schema) that's dropped during log backup if err = client.InsertGCRows(ctx); err != nil { return errors.Annotate(err, "failed to insert rows into gc_delete_range") } + // index ingestion is not captured by regular log backup, so we need to manually ingest again if err = client.RepairIngestIndex(ctx, ingestRecorder, g); err != nil { return errors.Annotate(err, "failed to repair ingest index") } @@ -1600,11 +1596,11 @@ func restoreStream( return nil } -func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, mgr *conn.Mgr) (*logclient.LogClient, error) { +func createLogClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, mgr *conn.Mgr) (*logclient.LogClient, error) { var err error keepaliveCfg := GetKeepalive(&cfg.Config) keepaliveCfg.PermitWithoutStream = true - client := logclient.NewRestoreClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) + client := logclient.NewLogClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) err = client.Init(ctx, g, mgr.GetStorage()) if err != nil { @@ -1646,6 +1642,20 @@ func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, m return nil, errors.Trace(err) } + encryptionManager, err := encryption.NewManager(&cfg.LogBackupCipherInfo, &cfg.MasterKeyConfig) + if err != nil { + return nil, errors.Annotate(err, "failed to create encryption manager for log restore") + } + if err = client.InstallLogFileManager(ctx, cfg.StartTS, cfg.RestoreTS, cfg.MetadataDownloadBatchSize, encryptionManager); err != nil { + return nil, errors.Trace(err) + } + + migs, err := client.GetMigrations(ctx) + if err != nil { + return nil, errors.Trace(err) + } + client.BuildMigrations(migs) + return client, nil } @@ -1707,8 +1717,8 @@ type backupLogInfo struct { clusterID uint64 } -// getLogRange gets the log-min-ts and log-max-ts of starting log backup. -func getLogRange( +// getLogInfo gets the log-min-ts and log-max-ts of starting log backup. +func getLogInfo( ctx context.Context, cfg *Config, ) (backupLogInfo, error) { @@ -1716,10 +1726,10 @@ func getLogRange( if err != nil { return backupLogInfo{}, errors.Trace(err) } - return getLogRangeWithStorage(ctx, s) + return getLogInfoFromStorage(ctx, s) } -func getLogRangeWithStorage( +func getLogInfoFromStorage( ctx context.Context, s storage.ExternalStorage, ) (backupLogInfo, error) { @@ -1826,20 +1836,11 @@ func parseFullBackupTablesStorage( }, nil } -func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { +func buildRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { rules := make(map[int64]*restoreutils.RewriteRules) - filter := schemasReplace.TableFilter - - for _, dbReplace := range schemasReplace.DbMap { - if utils.IsSysDB(dbReplace.Name) || !filter.MatchSchema(dbReplace.Name) { - continue - } + for _, dbReplace := range schemasReplace.DbReplaceMap { for oldTableID, tableReplace := range dbReplace.TableMap { - if !filter.MatchTable(dbReplace.Name, tableReplace.Name) { - continue - } - if _, exist := rules[oldTableID]; !exist { log.Info("add rewrite rule", zap.String("tableName", dbReplace.Name+"."+tableReplace.Name), @@ -1862,7 +1863,7 @@ func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreu } // ShiftTS gets a smaller shiftTS than startTS. -// It has a safe duration between shiftTS and startTS for trasaction. +// It has a safe duration between shiftTS and startTS for transaction. func ShiftTS(startTS uint64) uint64 { physical := oracle.ExtractPhysical(startTS) logical := oracle.ExtractLogical(startTS) @@ -1888,7 +1889,11 @@ type PiTRTaskInfo struct { FullRestoreCheckErr error } -func checkPiTRTaskInfo( +func (p *PiTRTaskInfo) hasTiFlashItemsInCheckpoint() bool { + return p.CheckpointInfo != nil && p.CheckpointInfo.Metadata != nil && p.CheckpointInfo.Metadata.TiFlashItems != nil +} + +func generatePiTRTaskInfo( ctx context.Context, mgr *conn.Mgr, g glue.Glue, @@ -1911,7 +1916,7 @@ func checkPiTRTaskInfo( if err != nil { return checkInfo, errors.Trace(err) } - // the log restore checkpoint metadata is persist, so the PITR is in the log restore stage. + // the log restore checkpoint metadata is persisted, so the PITR is in the log restore stage. if curTaskInfo.Metadata != nil { // TODO: check whether user has manually modified the cluster(ddl). If so, regard the behavior // as restore from scratch. (update `curTaskInfo.RewriteTs` to 0 as an uninitial value) @@ -1973,3 +1978,88 @@ func waitUntilSchemaReload(ctx context.Context, client *logclient.LogClient) err log.Info("reloading schema finished", zap.Duration("timeTaken", time.Since(reloadStart))) return nil } + +func isCurrentIdMapSaved(checkpointTaskInfo *checkpoint.CheckpointTaskInfoForLogRestore) bool { + newTask := false + if checkpointTaskInfo != nil && checkpointTaskInfo.Progress == checkpoint.InLogRestoreAndIdMapPersisted { + newTask = true + } + return newTask +} + +func buildSchemaReplace( + client *logclient.LogClient, + cfg *LogRestoreConfig, + tableMappingManager *stream.TableMappingManager) (*stream.SchemasReplace, error) { + schemasReplace := stream.NewSchemasReplace(tableMappingManager.DBReplaceMap, cfg.tiflashRecorder, + client.CurrentTS(), client.RecordDeleteRange) + schemasReplace.AfterTableRewrittenFn = func(deleted bool, tableInfo *model.TableInfo) { + // When the table replica changed to 0, the tiflash replica might be set to `nil`. + // We should remove the table if we meet. + if deleted || tableInfo.TiFlashReplica == nil { + cfg.tiflashRecorder.DelTable(tableInfo.ID) + return + } + cfg.tiflashRecorder.AddTable(tableInfo.ID, *tableInfo.TiFlashReplica) + // Remove the replica first and restore them at the end. + tableInfo.TiFlashReplica = nil + } + return schemasReplace, nil +} + +func buildAndSaveIDMapIfNeeded(ctx context.Context, client *logclient.LogClient, cfg *LogRestoreConfig, + tableMappingManager *stream.TableMappingManager) error { + // get full backup meta storage if needed. + fullBackupStorage, err := parseFullBackupTablesStorage(cfg.RestoreConfig) + if err != nil { + return errors.Trace(err) + } + + // get the schemas ID replace information. + saved := isCurrentIdMapSaved(cfg.checkpointTaskInfo) + dbReplaces, err := client.GetBaseIDMap(ctx, &logclient.GetIDMapConfig{ + LoadSavedIDMap: saved, + TableFilter: cfg.TableFilter, + PiTRTableFilter: cfg.PiTRTableFilter, + FullBackupStorage: fullBackupStorage, + CipherInfo: &cfg.Config.CipherInfo, + }) + if err != nil { + return errors.Trace(err) + } + + if saved { + err := tableMappingManager.FromDBReplaceMap(dbReplaces) + if err != nil { + return errors.Trace(err) + } + stream.LogDBReplaceMap("######## load from saved id map", tableMappingManager.DBReplaceMap) + } else { + stream.LogDBReplaceMap("######## before merge", tableMappingManager.DBReplaceMap) + tableMappingManager.MergeBaseDBReplace(dbReplaces) + tableMappingManager.FilterDBReplaceMap(cfg.PiTRTableFilter) + err = tableMappingManager.ReplaceTemporaryIDs(ctx, client.GenGlobalIDs) + stream.LogDBReplaceMap("######## after filter", tableMappingManager.DBReplaceMap) + if err != nil { + return errors.Trace(err) + } + if err = client.SaveIdMapWithFailPoints(ctx, tableMappingManager); err != nil { + return errors.Trace(err) + } + } + return nil +} + +func getCurrentTSFromCheckpointOrPD(ctx context.Context, mgr *conn.Mgr, cfg *LogRestoreConfig) (uint64, error) { + if cfg.checkpointTaskInfo != nil && cfg.checkpointTaskInfo.Metadata != nil { + // reuse the checkpoint task's rewrite ts + rewriteTS := cfg.checkpointTaskInfo.Metadata.RewriteTS + log.Info("reuse the task's rewrite ts", zap.Uint64("rewrite-ts", rewriteTS)) + return rewriteTS, nil + } + currentTS, err := restore.GetTSWithRetry(ctx, mgr.GetPDClient()) + if err != nil { + return 0, errors.Trace(err) + } + return currentTS, nil +} diff --git a/br/pkg/task/stream_test.go b/br/pkg/task/stream_test.go index 847699bc152cd..bdf2fdf41aa80 100644 --- a/br/pkg/task/stream_test.go +++ b/br/pkg/task/stream_test.go @@ -192,7 +192,7 @@ func TestGetLogRangeWithFullBackupDir(t *testing.T) { cfg := Config{ Storage: testDir, } - _, err = getLogRange(context.TODO(), &cfg) + _, err = getLogInfo(context.TODO(), &cfg) require.Error(t, err, errors.Annotate(berrors.ErrStorageUnknown, "the storage has been used for full backup")) } @@ -215,7 +215,7 @@ func TestGetLogRangeWithLogBackupDir(t *testing.T) { cfg := Config{ Storage: testDir, } - logInfo, err := getLogRange(context.TODO(), &cfg) + logInfo, err := getLogInfo(context.TODO(), &cfg) require.Nil(t, err) require.Equal(t, logInfo.logMinTS, startLogBackupTS) } diff --git a/br/pkg/utils/BUILD.bazel b/br/pkg/utils/BUILD.bazel index 424329b7b3134..06827b9435a2a 100644 --- a/br/pkg/utils/BUILD.bazel +++ b/br/pkg/utils/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "dyn_pprof_unix.go", "encryption.go", "error_handling.go", + "filter.go", "json.go", "key.go", "misc.go", @@ -36,10 +37,13 @@ go_library( "//pkg/parser/terror", "//pkg/parser/types", "//pkg/sessionctx", + "//pkg/tablecodec", "//pkg/util", + "//pkg/util/codec", "//pkg/util/encrypt", "//pkg/util/logutil", "//pkg/util/sqlexec", + "//pkg/util/table-filter", "@com_github_cheggaaa_pb_v3//:pb", "@com_github_docker_go_units//:go-units", "@com_github_google_uuid//:uuid", @@ -86,7 +90,7 @@ go_test( ], embed = [":utils"], flaky = True, - shard_count = 34, + shard_count = 36, deps = [ "//br/pkg/errors", "//pkg/kv", @@ -104,6 +108,7 @@ go_test( "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_kvproto//pkg/errorpb", "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//oracle", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_pd_client//:client", "@io_etcd_go_etcd_tests_v3//integration", diff --git a/br/pkg/utils/consts/BUILD.bazel b/br/pkg/utils/consts/BUILD.bazel new file mode 100644 index 0000000000000..1c9766fe93e1a --- /dev/null +++ b/br/pkg/utils/consts/BUILD.bazel @@ -0,0 +1,8 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "consts", + srcs = ["consts.go"], + importpath = "github.com/pingcap/tidb/br/pkg/utils/consts", + visibility = ["//visibility:public"], +) diff --git a/br/pkg/utils/consts/consts.go b/br/pkg/utils/consts/consts.go new file mode 100644 index 0000000000000..93390bcbacc38 --- /dev/null +++ b/br/pkg/utils/consts/consts.go @@ -0,0 +1,21 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package consts + +// Default columnFamily and write columnFamily +const ( + DefaultCF = "default" + WriteCF = "write" +) diff --git a/br/pkg/utils/filter.go b/br/pkg/utils/filter.go new file mode 100644 index 0000000000000..97c90187c3e2c --- /dev/null +++ b/br/pkg/utils/filter.go @@ -0,0 +1,221 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + "sort" + "strings" + + filter "github.com/pingcap/tidb/pkg/util/table-filter" +) + +var ( + // acceptAllFilter is a singleton filter that accepts all tables + acceptAllFilter = initAcceptAllFilter() + // rejectAllFilter is a singleton filter that rejects all tables + rejectAllFilter = initRejectAllFilter() +) + +// initAcceptAllFilter is the internal initialization function +func initAcceptAllFilter() filter.Filter { + f, _ := filter.Parse([]string{"*.*"}) + return f +} + +// initRejectAllFilter is the internal initialization function +func initRejectAllFilter() filter.Filter { + f, _ := filter.Parse([]string{"!*.*"}) + return f +} + +// NewAcceptAllFilter returns the singleton accept-all filter +func NewAcceptAllFilter() filter.Filter { + return acceptAllFilter +} + +// NewRejectAllFilter returns the singleton reject-all filter +func NewRejectAllFilter() filter.Filter { + return rejectAllFilter +} + +type PiTRTableFilter struct { + DbIdToTable map[int64]map[int64]struct{} +} + +func NewPiTRTableFilter() *PiTRTableFilter { + return &PiTRTableFilter{ + DbIdToTable: make(map[int64]map[int64]struct{}), + } +} + +// UpdateTable adds a table ID to the filter for the given database ID +func (f *PiTRTableFilter) UpdateTable(dbID, tableID int64) { + if f.DbIdToTable == nil { + f.DbIdToTable = make(map[int64]map[int64]struct{}) + } + + if _, ok := f.DbIdToTable[dbID]; !ok { + f.DbIdToTable[dbID] = make(map[int64]struct{}) + } + + f.DbIdToTable[dbID][tableID] = struct{}{} +} + +// UpdateDB adds the database id +func (f *PiTRTableFilter) UpdateDB(dbID int64) { + if f.DbIdToTable == nil { + f.DbIdToTable = make(map[int64]map[int64]struct{}) + } + + if _, ok := f.DbIdToTable[dbID]; !ok { + f.DbIdToTable[dbID] = make(map[int64]struct{}) + } +} + +// Remove removes a table ID from the filter for the given database ID. +// Returns true if the table was found and removed, false otherwise. +func (f *PiTRTableFilter) Remove(dbID, tableID int64) bool { + if tables, ok := f.DbIdToTable[dbID]; ok { + if _, exists := tables[tableID]; exists { + delete(tables, tableID) + return true + } + } + return false +} + +// ContainsTable checks if the given database ID and table ID combination exists in the filter +func (f *PiTRTableFilter) ContainsTable(dbID, tableID int64) bool { + if tables, ok := f.DbIdToTable[dbID]; ok { + _, exists := tables[tableID] + return exists + } + return false +} + +// ContainsDB checks if the given database ID exists in the filter +func (f *PiTRTableFilter) ContainsDB(dbID int64) bool { + _, ok := f.DbIdToTable[dbID] + return ok +} + +// String returns a string representation of the PiTRTableFilter for debugging +func (f *PiTRTableFilter) String() string { + if f == nil || f.DbIdToTable == nil { + return "PiTRTableFilter{nil}" + } + + var result strings.Builder + result.WriteString("PiTRTableFilter{\n") + for dbID, tables := range f.DbIdToTable { + result.WriteString(fmt.Sprintf(" DB[%d]: {", dbID)) + tableIDs := make([]int64, 0, len(tables)) + for tableID := range tables { + tableIDs = append(tableIDs, tableID) + } + // Sort for consistent output + sort.Slice(tableIDs, func(i, j int) bool { return tableIDs[i] < tableIDs[j] }) + for i, tableID := range tableIDs { + if i > 0 { + result.WriteString(", ") + } + result.WriteString(fmt.Sprintf("%d", tableID)) + } + result.WriteString("}\n") + } + result.WriteString("}") + return result.String() +} + +type CombinedFilter struct { + userFilter filter.Filter + systemFilter filter.Filter +} + +func NewCombinedFilter(userFilter, systemFilter filter.Filter) *CombinedFilter { + return &CombinedFilter{ + userFilter: userFilter, + systemFilter: systemFilter, + } +} + +func NewCombinedFilterAcceptAll() *CombinedFilter { + acceptAll := NewAcceptAllFilter() + return &CombinedFilter{ + userFilter: acceptAll, + systemFilter: acceptAll, + } +} + +func NewCombinedFilterRejectAll() *CombinedFilter { + rejectAll := NewRejectAllFilter() + return &CombinedFilter{ + userFilter: rejectAll, + systemFilter: rejectAll, + } +} + +// NewCombinedFilterNoSystem creates a CombinedFilter where the system filter accepts nothing +// while keeping the user filter as provided +func NewCombinedFilterNoSystem(userFilter filter.Filter) *CombinedFilter { + rejectAll := NewRejectAllFilter() + return &CombinedFilter{ + userFilter: userFilter, + systemFilter: rejectAll, + } +} + +func NewCombinedFilterNoUser(sysFilter filter.Filter) *CombinedFilter { + rejectAll := NewRejectAllFilter() + return &CombinedFilter{ + userFilter: rejectAll, + systemFilter: sysFilter, + } +} + +// MatchSchema detects whether it's a system table or user table and apply corresponding filter +func (f *CombinedFilter) MatchSchema(schemaName string) bool { + schemaName, _ = StripTempTableNamePrefixIfNeeded(schemaName) + if IsSysDB(schemaName) { + return f.systemFilter.MatchSchema(schemaName) + } + return f.userFilter.MatchSchema(schemaName) +} + +func (f *CombinedFilter) MatchTable(schemaName string, tableName string) bool { + schemaName, _ = StripTempTableNamePrefixIfNeeded(schemaName) + if IsSysDB(schemaName) { + return f.systemFilter.MatchTable(schemaName, tableName) + } + return f.userFilter.MatchTable(schemaName, tableName) +} + +func (f *CombinedFilter) ToCaseInsensitive() *CombinedFilter { + return &CombinedFilter{ + userFilter: filter.CaseInsensitive(f.userFilter), + systemFilter: filter.CaseInsensitive(f.systemFilter), + } +} + +// IsAcceptAll returns true if both user and system filters are accept-all filters +func (f *CombinedFilter) IsAcceptAll() bool { + return f.userFilter.MatchTable("*", "*") && f.systemFilter.MatchTable("*", "*") +} + +// GetFilters returns the user and system filters from a CombinedFilter +func (f *CombinedFilter) GetFilters() (userFilter, systemFilter filter.Filter) { + return f.userFilter, f.systemFilter +} diff --git a/br/pkg/utils/key.go b/br/pkg/utils/key.go index d8371a023fca7..4eceaf9ea2162 100644 --- a/br/pkg/utils/key.go +++ b/br/pkg/utils/key.go @@ -8,12 +8,15 @@ import ( "fmt" "io" "strings" + "time" "github.com/pingcap/errors" "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" ) @@ -190,3 +193,27 @@ func IntersectAll(s1 []kv.KeyRange, s2 []kv.KeyRange) []kv.KeyRange { } return rs } + +const DateFormat = "2006-01-02 15:04:05.999999999 -0700" + +func FormatDate(ts time.Time) string { + return ts.Format(DateFormat) +} + +func IsMetaDBKey(key []byte) bool { + return strings.HasPrefix(string(key), "mDB") +} + +func IsMetaDDLJobHistoryKey(key []byte) bool { + return strings.HasPrefix(string(key), "mDDLJobH") +} + +func IsDBOrDDLJobHistoryKey(key []byte) bool { + return strings.HasPrefix(string(key), "mD") +} + +func EncodeTxnMetaKey(key []byte, field []byte, ts uint64) []byte { + k := tablecodec.EncodeMetaKey(key, field) + txnKey := codec.EncodeBytes(nil, k) + return codec.EncodeUintDesc(txnKey, ts) +} diff --git a/br/pkg/utils/key_test.go b/br/pkg/utils/key_test.go index 28e24055443de..86dec2c546609 100644 --- a/br/pkg/utils/key_test.go +++ b/br/pkg/utils/key_test.go @@ -7,9 +7,11 @@ import ( "fmt" "slices" "testing" + "time" "github.com/pingcap/tidb/pkg/kv" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" ) func TestParseKey(t *testing.T) { @@ -177,3 +179,40 @@ func TestClampKeyRanges(t *testing.T) { }) } } + +func TestDateFormat(t *testing.T) { + cases := []struct { + ts uint64 + target string + }{ + { + 434604259287760897, + "2022-07-15 19:14:39.534 +0800", + }, + { + 434605479096221697, + "2022-07-15 20:32:12.734 +0800", + }, + { + 434605478903808000, + "2022-07-15 20:32:12 +0800", + }, + } + + timeZone, _ := time.LoadLocation("Asia/Shanghai") + for _, ca := range cases { + date := FormatDate(oracle.GetTimeFromTS(ca.ts).In(timeZone)) + require.Equal(t, ca.target, date) + } +} + +func TestPrefix(t *testing.T) { + require.True(t, IsMetaDBKey([]byte("mDBs"))) + require.False(t, IsMetaDBKey([]byte("mDDL"))) + require.True(t, IsMetaDDLJobHistoryKey([]byte("mDDLJobHistory"))) + require.False(t, IsMetaDDLJobHistoryKey([]byte("mDDL"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDL"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDB:"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDDLHistory"))) + require.False(t, IsDBOrDDLJobHistoryKey([]byte("DDL"))) +} diff --git a/br/pkg/utils/schema.go b/br/pkg/utils/schema.go index 47ea86dcc9370..67fce9421bd2e 100644 --- a/br/pkg/utils/schema.go +++ b/br/pkg/utils/schema.go @@ -39,20 +39,22 @@ func IsTemplateSysDB(dbname pmodel.CIStr) bool { // IsSysDB tests whether the database is system DB. // Currently, both `mysql` and `sys` are system DB. func IsSysDB(dbLowerName string) bool { + // just in case + dbLowerName = strings.ToLower(dbLowerName) return dbLowerName == mysql.SystemDB || dbLowerName == mysql.SysDB } -// TemporaryDBName makes a 'private' database name. -func TemporaryDBName(db string) pmodel.CIStr { +// WithTemporaryDBNamePrefix makes a 'private' database name. +func WithTemporaryDBNamePrefix(db string) pmodel.CIStr { return pmodel.NewCIStr(temporaryDBNamePrefix + db) } -// GetSysDBName get the original name of system DB -func GetSysDBName(tempDB pmodel.CIStr) (string, bool) { - if ok := strings.HasPrefix(tempDB.O, temporaryDBNamePrefix); !ok { - return tempDB.O, false +// StripTempTableNamePrefixIfNeeded get the original name of system DB +func StripTempTableNamePrefixIfNeeded(tempDB string) (string, bool) { + if ok := strings.HasPrefix(tempDB, temporaryDBNamePrefix); !ok { + return tempDB, false } - return tempDB.O[len(temporaryDBNamePrefix):], true + return tempDB[len(temporaryDBNamePrefix):], true } // GetSysDBCIStrName get the CIStr name of system DB diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index 0816537b7b087..d9960961a3073 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -100,7 +100,7 @@ check_result() { # start a new cluster restart_services_allowing_huge_index -# non-compliant operation +# non-compliant operation, need full backup specified for the first time PiTR echo "non compliant operation" restore_fail=0 run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --start-ts $current_ts || restore_fail=1 diff --git a/br/tests/br_pitr_table_filter/run.sh b/br/tests/br_pitr_table_filter/run.sh new file mode 100755 index 0000000000000..fcfd9c9338ef4 --- /dev/null +++ b/br/tests/br_pitr_table_filter/run.sh @@ -0,0 +1,429 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux +DB="$TEST_NAME" +CUR=$(cd `dirname $0`; pwd) +TASK_NAME="pitr_table_filter" +. run_services + +# helper methods +create_tables_with_values() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to create + + for i in $(seq 1 $count); do + run_sql "create table $DB.${prefix}_${i}(c int); insert into $DB.${prefix}_${i} values ($i);" + done +} + +verify_tables() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to verify + local should_exist=$3 # true/false - whether tables should exist + + for i in $(seq 1 $count); do + if [ "$should_exist" = "true" ]; then + run_sql "select count(*) = 1 from $DB.${prefix}_${i} where c = $i" || { + echo "Table $DB.${prefix}_${i} doesn't have expected value $i" + exit 1 + } + else + if run_sql "select * from $DB.${prefix}_${i}" 2>/dev/null; then + echo "Table $DB.${prefix}_${i} exists but should not" + exit 1 + fi + fi + done +} + +rename_tables() { + local old_prefix=$1 # original table name prefix + local new_prefix=$2 # new table name prefix + local count=$3 # number of tables to rename + + for i in $(seq 1 $count); do + run_sql "rename table $DB.${old_prefix}_${i} to $DB.${new_prefix}_${i};" + done +} + +drop_tables() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to drop + + for i in $(seq 1 $count); do + run_sql "drop table $DB.${prefix}_${i};" + done +} + +verify_other_db_tables() { + local should_exist=$1 # true/false - whether tables should exist + + if [ "$should_exist" = "true" ]; then + run_sql "select count(*) = 1 from ${DB}_other.test_table where c = 42" || { + echo "Table ${DB}_other.test_table doesn't have expected value 42" + exit 1 + } + else + if run_sql "select * from ${DB}_other.test_table" 2>/dev/null; then + echo "Table ${DB}_other.test_table exists but should not" + exit 1 + fi + fi +} + +test_basic_filter() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start basic filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + run_sql "create schema ${DB}_other;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "table_to_drop" 3 + + run_br backup full -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + run_sql "create table ${DB}_other.test_table(c int); insert into ${DB}_other.test_table values (42);" + create_tables_with_values "log_backup_lower" 3 + create_tables_with_values "LOG_BACKUP_UPPER" 3 + create_tables_with_values "other" 3 + drop_tables "table_to_drop" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "case 1 sanity check, zero filter" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 true + verify_tables "other" 3 true + verify_tables "table_to_drop" 3 false + verify_other_db_tables true + + echo "case 2 with log backup table filter" + run_sql "drop schema $DB;" + run_sql "drop schema ${DB}_other;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 false + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + verify_other_db_tables false + + echo "case 3 with multiple filters" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" -f "$DB.full*" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 true + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + verify_other_db_tables false + + echo "case 4 with negative filters" + run_sql "drop schema $DB;" + # have to use a match all filter before using negative filters + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "*.*" -f "!$DB.log*" + + verify_tables "log_backup_lower" 3 false + verify_tables "LOG_BACKUP_UPPER" 3 false + verify_tables "full_backup" 3 true + verify_tables "other" 3 true + verify_tables "table_to_drop" 3 false + verify_other_db_tables true + + echo "case 5 restore dropped table" + run_sql "drop schema $DB;" + run_sql "drop schema ${DB}_other;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.table*" + + verify_tables "log_backup_lower" 3 false + verify_tables "LOG_BACKUP_UPPER" 3 false + verify_tables "full_backup" 3 false + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + verify_other_db_tables false + + echo "case 6 restore only other database" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "${DB}_other.*" + + verify_tables "log_backup_lower" 3 false + verify_tables "LOG_BACKUP_UPPER" 3 false + verify_tables "full_backup" 3 false + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + verify_other_db_tables true + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "basic filter test cases passed" +} + +test_table_rename() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start table rename with filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + # has been renamed, should not visible anymore + verify_tables "renamed_in" 3 false + # also renamed out of filter range, should not be visible for both + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "table rename with filter passed" +} + +test_with_checkpoint() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start table filter with checkpoint" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + # Using single quotes to prevent shell interpretation + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return("corrupt-last-table-files")' + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting full backup last table corruption but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore metakv stage + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return("only-last-table-files");github.com/pingcap/tidb/br/pkg/restore/log_client/failed-after-id-maps-saved=return(true)' + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failed after id map saved but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore datakv stage + # skip the snapshot restore stage + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/task/corrupt-files=return("corrupt-last-table-files")' + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting log restore last table corruption but success' + exit 1 + fi + + # PITR with checkpoint + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/task/corrupt-files=return("only-last-table-files")' + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" + export GO_FAILPOINTS="" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + # has been renamed, should not visible anymore + verify_tables "renamed_in" 3 false + # also renamed out of filter range, should not be visible for both + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "table filter checkpoint passed" +} + +test_exchange_partition() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start testing exchange partition with filter" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + # create a partitioned table and a normal table for exchange + run_sql "CREATE TABLE $DB.full_partitioned ( + id INT, + value INT + ) PARTITION BY RANGE (id) ( + PARTITION p0 VALUES LESS THAN (100), + PARTITION p1 VALUES LESS THAN (200) + );" + + run_sql "CREATE TABLE $DB.log_table ( + id INT, + value INT + );" + + run_sql "INSERT INTO $DB.full_partitioned VALUES (50, 1), (150, 2);" + run_sql "INSERT INTO $DB.log_table VALUES (75, 3);" + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + # exchange partition and create some new tables with log_ prefix + run_sql "ALTER TABLE $DB.full_partitioned EXCHANGE PARTITION p0 WITH TABLE $DB.log_table;" + # some sanity check + run_sql "CREATE TABLE $DB.log_after_exchange (id INT, value INT);" + run_sql "INSERT INTO $DB.log_after_exchange VALUES (1, 1);" + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + restart_services || { echo "Failed to restart services"; exit 1; } + + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + # verify the results + run_sql "SELECT count(*) = 1 FROM $DB.log_after_exchange WHERE id = 1 AND value = 1" || { + echo "Table log_after_exchange doesn't have expected values" + exit 1 + } + + # table should be exchanged and kept after restore + run_sql "SELECT count(*) = 1 FROM $DB.log_table WHERE id = 50 AND value = 1" || { + echo "log_table doesn't have the exchanged partition data (50,1)" + exit 1 + } + + if run_sql "SELECT * FROM $DB.full_partitioned" 2>/dev/null; then + echo "Table full_partitioned exists but should not" + exit 1 + fi + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "exchange partition with filter test passed" +} + +test_system_tables() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start system tables testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + # Create and populate a user table for reference + run_sql "create table $DB.user_table(id int primary key);" + run_sql "insert into $DB.user_table values (1);" + + # Make some changes to system tables + run_sql "create user 'test_user'@'%' identified by 'password';" + run_sql "grant select on $DB.* to 'test_user'@'%';" + + run_br backup full -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "make more changes to system tables and wait for log backup" + run_sql "revoke select on $DB.* from 'test_user'@'%';" + run_sql "grant insert on $DB.* to 'test_user'@'%';" + run_sql "alter user 'test_user'@'%' identified by 'newpassword';" + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "restore point-in-time backup including system tables" + run_br --pd "$PD_ADDR" restore point -f "*.*" -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" + + # Verify system table changes were restored + # Check if user exists with correct privileges + run_sql "select count(*) = 1 from mysql.user where User = 'test_user' and Host = '%'" || { + echo "test_user not found in mysql.user table" + exit 1 + } + + # Verify the privileges were restored correctly + run_sql "select count(*) = 1 from mysql.tables_priv where User = 'test_user' and Host = '%' and Table_priv = 'Insert'" || { + echo "Incorrect privileges for test_user" + exit 1 + } + + # cleanup + run_sql "drop user 'test_user'@'%';" + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "system tables test passed" +} + +# run all test cases +test_basic_filter +test_table_rename +test_with_checkpoint +test_exchange_partition + +echo "br pitr table filter all tests passed" diff --git a/br/tests/br_stats/run.sh b/br/tests/br_stats/run.sh index 1e867a12f4421..c162d52de21b7 100644 --- a/br/tests/br_stats/run.sh +++ b/br/tests/br_stats/run.sh @@ -27,7 +27,7 @@ for i in $(seq $DB_COUNT); do done unset BR_LOG_TO_TERM -run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --log-file $LOG --ignore-stats=false --filter "${DB}1.*" --filter "${DB}2.*" || cat $LOG +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" --log-file $LOG --ignore-stats=false --sys-filter "!*.*" --filter "${DB}1.*" --filter "${DB}2.*" || cat $LOG dump_cnt=$(cat $LOG | grep "dump stats to json" | wc -l) dump_db1_cnt=$(cat $LOG | grep "dump stats to json" | grep "${DB}1" | wc -l) dump_db2_cnt=$(cat $LOG | grep "dump stats to json" | grep "${DB}2" | wc -l) @@ -45,7 +45,7 @@ for i in $(seq $DB_COUNT); do done rm -f $LOG -run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --log-file $LOG --filter "${DB}1.*" || cat $LOG +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" --log-file $LOG --sys-filter "!*.*" --filter "${DB}1.*" || cat $LOG load_cnt=$(cat $LOG | grep "restore statistic data done" | wc -l) load_db1_cnt=$(cat $LOG | grep "restore statistic data done" | grep "${DB}1" | wc -l) load_mark=$((${load_cnt}+10*${load_db1_cnt})) diff --git a/br/tests/br_systables/run.sh b/br/tests/br_systables/run.sh index 3b2485f933d5c..0ec8d182bfa60 100644 --- a/br/tests/br_systables/run.sh +++ b/br/tests/br_systables/run.sh @@ -79,14 +79,14 @@ check2() { modify_systables run_br backup full -s "local://$backup_dir" rollback_modify -run_br restore full -f '*.*' -f '!mysql.bar' -s "local://$backup_dir" +run_br restore full --sys-filter '*.*' --sys-filter '!mysql.bar' -s "local://$backup_dir" check -run_br restore full -f 'mysql.bar' -s "local://$backup_dir" +run_br restore full --sys-filter 'mysql.bar' -s "local://$backup_dir" run_sql "SELECT count(*) from mysql.bar;" | grep 11 rollback_modify -run_br restore full -f "mysql*.*" -f '!mysql.bar' -s "local://$backup_dir" +run_br restore full --sys-filter "mysql*.*" --sys-filter '!mysql.bar' -s "local://$backup_dir" check add_user @@ -94,7 +94,7 @@ add_test_data run_br backup full -s "local://${backup_dir}1" delete_user delete_test_data -run_br restore full -f "mysql*.*" -f "usertest.*" -s "local://${backup_dir}1" +run_br restore full --sys-filter "mysql*.*" -f "usertest.*" -s "local://${backup_dir}1" check2 delete_user diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh index 0c9518f69fb97..eb5b4fa0bbe6a 100755 --- a/br/tests/run_group_br_tests.sh +++ b/br/tests/run_group_br_tests.sh @@ -26,7 +26,7 @@ groups=( ["G03"]='br_incompatible_tidb_config br_incremental br_incremental_index br_incremental_only_ddl br_incremental_same_table br_insert_after_restore br_key_locked br_log_test br_move_backup br_mv_index' ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table ' ["G05"]='br_skip_checksum br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter br_partition_add_index' - ["G06"]='br_tikv_outage br_tikv_outage3 br_restore_checkpoint br_encryption' + ["G06"]='br_tikv_outage br_tikv_outage3 br_restore_checkpoint br_encryption br_pitr_table_filter' ["G07"]='br_pitr' ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption br_tiflash_conflict' ) diff --git a/br/tests/utils.go b/br/tests/utils.go index e8653aaaabb43..2033c712a756c 100644 --- a/br/tests/utils.go +++ b/br/tests/utils.go @@ -75,41 +75,42 @@ func runValidateBackupFiles(cmd *cobra.Command, args []string) { // as full backup will have backup files ready in the storage path after returning from the command // and log backup will not, so we can only use restore point to validate. func parseCommand(cmd string) (string, bool) { - // Create a temporary cobra command to parse the input - tempCmd := &cobra.Command{} - tempCmd.Flags().String("s", "", "Storage path (short)") - tempCmd.Flags().String("storage", "", "Storage path (long)") - - // Split the command string into args + // not using cobra since it has to define all the possible flags otherwise will report parsing error args := strings.Fields(cmd) - // Parse the args - if err := tempCmd.Flags().Parse(args); err != nil { - return "", false - } - - // Check for backup or restore point command + // check for backup or restore point command hasBackupOrRestorePoint := false - for i, arg := range args { + storagePath := "" + + for i := 0; i < len(args); i++ { + arg := args[i] if arg == "backup" { hasBackupOrRestorePoint = true - break + continue } if i < len(args)-1 && arg == "restore" && args[i+1] == "point" { hasBackupOrRestorePoint = true - break + continue } - } - // Get the storage path from either -s or -storage flag - storagePath, _ := tempCmd.Flags().GetString("s") - if storagePath == "" { - storagePath, _ = tempCmd.Flags().GetString("storage") + // check for storage path in various formats + if arg == "-s" || arg == "--storage" { + if i+1 < len(args) { + storagePath = args[i+1] + i++ // skip the next arg since we consumed it + } + } else if strings.HasPrefix(arg, "--storage=") { + storagePath = strings.TrimPrefix(arg, "--storage=") + } else if strings.HasPrefix(arg, "-s=") { + storagePath = strings.TrimPrefix(arg, "-s=") + } } - storagePath = strings.TrimPrefix(storagePath, "local://") - if hasBackupOrRestorePoint && storagePath != "" { - return storagePath, true + if strings.HasPrefix(storagePath, "local://") { + storagePath = strings.TrimPrefix(storagePath, "local://") + if hasBackupOrRestorePoint && storagePath != "" { + return storagePath, true + } } return "", false } diff --git a/pkg/executor/brie.go b/pkg/executor/brie.go index 8ad6ec5b9b2f9..c1af5f2426277 100644 --- a/pkg/executor/brie.go +++ b/pkg/executor/brie.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/task" "github.com/pingcap/tidb/br/pkg/task/show" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/domain" @@ -47,6 +48,7 @@ import ( "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/stmtctx" "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/dbterror/exeerrors" "github.com/pingcap/tidb/pkg/util/dbterror/plannererrors" @@ -371,19 +373,37 @@ func (b *executorBuilder) buildBRIE(s *ast.BRIEStmt, schema *expression.Schema) switch { case len(s.Tables) != 0: tables := make([]filter.Table, 0, len(s.Tables)) + sysTables := make([]filter.Table, 0, len(s.Tables)) for _, tbl := range s.Tables { - tables = append(tables, filter.Table{Name: tbl.Name.O, Schema: tbl.Schema.O}) + if util.IsSysDB(tbl.Schema.L) { + sysTables = append(sysTables, filter.Table{Name: tbl.Name.O, Schema: tbl.Schema.O}) + } else { + tables = append(tables, filter.Table{Name: tbl.Name.O, Schema: tbl.Schema.O}) + } } - cfg.TableFilter = filter.NewTablesFilter(tables...) + userFilter := filter.NewTablesFilter(tables...) + sysFilter := filter.NewTablesFilter(sysTables...) + cfg.TableFilter = utils.NewCombinedFilter(userFilter, sysFilter) case len(s.Schemas) != 0: - cfg.TableFilter = filter.NewSchemasFilter(s.Schemas...) + schemas := make([]string, 0, len(s.Schemas)) + sysSchemas := make([]string, 0, len(s.Schemas)) + for _, schema := range s.Schemas { + if util.IsSysDB(schema) { + sysSchemas = append(sysSchemas, schema) + } else { + schemas = append(schemas, schema) + } + } + userFilter := filter.NewSchemasFilter(schemas...) + sysFilter := filter.NewSchemasFilter(sysSchemas...) + cfg.TableFilter = utils.NewCombinedFilter(userFilter, sysFilter) default: - cfg.TableFilter = filter.All() + cfg.TableFilter = utils.NewCombinedFilterAcceptAll() } // table options are stored in original case, but comparison // is expected to be performed insensitive. - cfg.TableFilter = filter.CaseInsensitive(cfg.TableFilter) + cfg.TableFilter = cfg.TableFilter.ToCaseInsensitive() // We cannot directly use the query string, or the secret may be print. // NOTE: the ownership of `s.Storage` is taken here. diff --git a/tests/_utils/run_services b/tests/_utils/run_services index 617821cfc7577..8b9d3f89f9b08 100644 --- a/tests/_utils/run_services +++ b/tests/_utils/run_services @@ -261,7 +261,7 @@ start_tiflash() { i=0 while ! run_curl "https://$TIFLASH_HTTP/tiflash/store-status" 1>/dev/null 2>&1; do i=$((i+1)) - if [ "$i" -gt 20 ]; then + if [ "$i" -gt 1 ]; then echo "failed to start tiflash" return 1 fi