Skip to content

Commit

Permalink
feat: add stall disk recovery
Browse files Browse the repository at this point in the history
Signed-off-by: Manan Gupta <[email protected]>
  • Loading branch information
GuptaManan100 committed Jan 7, 2025
1 parent 549a8c4 commit baafe29
Show file tree
Hide file tree
Showing 18 changed files with 372 additions and 29 deletions.
3 changes: 3 additions & 0 deletions go/flags/endtoend/vtcombo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ Flags:
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
--srv_topo_cache_ttl duration how long to use cached entries for topology (default 1s)
--srv_topo_timeout duration topo server timeout (default 5s)
--stalled-disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
--stalled-disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
--stalled-disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
--start_mysql Should vtcombo also start mysql
--stats_backend string The name of the registered push-based monitoring/stats backend to use
--stats_combine_dimensions string List of dimensions to be combined into a single "all" value in exported stats vars
Expand Down
1 change: 1 addition & 0 deletions go/flags/endtoend/vtorc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Flags:
--config-type string Config file type (omit to infer config type from file extension).
--consul_auth_static_file string JSON File to read the topos/tokens from.
--emit_stats If set, emit stats to push-based monitoring and stats backends
--enable-stalled-disk-primary-recovery Whether VTOrc should be analyzing and recovering stalled disk primary failures
--grpc-dial-concurrency-limit int Maximum concurrency of grpc dial operations. This should be less than the golang max thread limit of 10000. (default 1024)
--grpc_auth_static_client_creds string When using grpc_static_auth in the server, this file provides the credentials to use to authenticate with server.
--grpc_compression string Which protocol to use for compressing gRPC. Default: nothing. Supported: snappy
Expand Down
3 changes: 3 additions & 0 deletions go/flags/endtoend/vttablet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,9 @@ Flags:
--srv_topo_cache_refresh duration how frequently to refresh the topology for cached entries (default 1s)
--srv_topo_cache_ttl duration how long to use cached entries for topology (default 1s)
--srv_topo_timeout duration topo server timeout (default 5s)
--stalled-disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
--stalled-disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
--stalled-disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
--stats_backend string The name of the registered push-based monitoring/stats backend to use
--stats_combine_dimensions string List of dimensions to be combined into a single "all" value in exported stats vars
--stats_common_tags strings Comma-separated list of common tags for the stats backend. It provides both label and values. Example: label1:value1,label2:value2
Expand Down
16 changes: 16 additions & 0 deletions go/vt/vtorc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ var (
Dynamic: true,
},
)

enableStalledDiskPrimaryRecovery = viperutil.Configure(
"enable-stalled-disk-primary-recovery",
viperutil.Options[bool]{
FlagName: "enable-stalled-disk-primary-recovery",
Default: false,
Dynamic: true,
},
)
)

func init() {
Expand All @@ -197,6 +206,7 @@ func registerFlags(fs *pflag.FlagSet) {
fs.Duration("recovery-poll-duration", recoveryPollDuration.Default(), "Timer duration on which VTOrc polls its database to run a recovery")
fs.Bool("allow-emergency-reparent", ersEnabled.Default(), "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
fs.Bool("change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs.Default(), "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
fs.Bool("enable-stalled-disk-primary-recovery", enableStalledDiskPrimaryRecovery.Default(), "Whether VTOrc should be analyzing and recovering stalled disk primary failures")

viperutil.BindFlags(fs,
instancePollTime,
Expand All @@ -214,6 +224,7 @@ func registerFlags(fs *pflag.FlagSet) {
recoveryPollDuration,
ersEnabled,
convertTabletsWithErrantGTIDs,
enableStalledDiskPrimaryRecovery,
)
}

Expand Down Expand Up @@ -332,6 +343,11 @@ func SetConvertTabletWithErrantGTIDs(val bool) {
convertTabletsWithErrantGTIDs.Set(val)
}

// GetStalledDiskPrimaryRecovery reports whether VTOrc is allowed to check for and recovery stalled disk problems.
func GetStalledDiskPrimaryRecovery() bool {
return enableStalledDiskPrimaryRecovery.Get()
}

// MarkConfigurationLoaded is called once configuration has first been loaded.
// Listeners on ConfigurationLoaded will get a notification
func MarkConfigurationLoaded() {
Expand Down
1 change: 1 addition & 0 deletions go/vt/vtorc/db/generate_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ CREATE TABLE database_instance (
semi_sync_primary_status TINYint NOT NULL DEFAULT 0,
semi_sync_replica_status TINYint NOT NULL DEFAULT 0,
semi_sync_primary_clients int NOT NULL DEFAULT 0,
stalled_disk TINYint NOT NULL DEFAULT 0,
PRIMARY KEY (alias)
)`,
`
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vtorc/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
LockedSemiSyncPrimaryHypothesis AnalysisCode = "LockedSemiSyncPrimaryHypothesis"
LockedSemiSyncPrimary AnalysisCode = "LockedSemiSyncPrimary"
ErrantGTIDDetected AnalysisCode = "ErrantGTIDDetected"
StalledDiskPrimary AnalysisCode = "StalledDiskPrimary"
)

type StructureAnalysisCode string
Expand Down Expand Up @@ -129,6 +130,7 @@ type ReplicationAnalysis struct {
MaxReplicaGTIDMode string
MaxReplicaGTIDErrant string
IsReadOnly bool
IsStalledDisk bool
}

func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
Expand Down
11 changes: 9 additions & 2 deletions go/vt/vtorc/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
vitess_keyspace.durability_policy AS durability_policy,
vitess_shard.primary_timestamp AS shard_primary_term_timestamp,
primary_instance.read_only AS read_only,
MIN(primary_instance.gtid_errant) AS gtid_errant,
MIN(primary_instance.gtid_errant) AS gtid_errant,
MIN(primary_instance.alias) IS NULL AS is_invalid,
MIN(primary_instance.binary_log_file) AS binary_log_file,
MIN(primary_instance.binary_log_pos) AS binary_log_pos,
Expand Down Expand Up @@ -233,7 +233,8 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
COUNT(
DISTINCT case when replica_instance.log_bin
AND replica_instance.log_replica_updates then replica_instance.major_version else NULL end
) AS count_distinct_logging_major_versions
) AS count_distinct_logging_major_versions,
primary_instance.stalled_disk != 0 AS is_stalled_disk
FROM
vitess_tablet
JOIN vitess_keyspace ON (
Expand Down Expand Up @@ -354,6 +355,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
a.HeartbeatInterval = m.GetFloat64("heartbeat_interval")

a.IsReadOnly = m.GetUint("read_only") == 1
a.IsStalledDisk = m.GetBool("is_stalled_disk")

if !a.LastCheckValid {
analysisMessage := fmt.Sprintf("analysis: Alias: %+v, Keyspace: %+v, Shard: %+v, IsPrimary: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v",
Expand Down Expand Up @@ -401,6 +403,11 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
} else if isInvalid {
a.Analysis = InvalidReplica
a.Description = "VTOrc hasn't been able to reach the replica even once since restart/shutdown"
} else if a.IsClusterPrimary && !a.LastCheckValid && a.IsStalledDisk {
a.Analysis = StalledDiskPrimary
a.Description = "Primary has a stalled disk"
ca.hasClusterwideAction = true
//
} else if a.IsClusterPrimary && !a.LastCheckValid && a.CountReplicas == 0 {
a.Analysis = DeadPrimaryWithoutReplicas
a.Description = "Primary cannot be reached by vtorc and has no replica"
Expand Down
31 changes: 27 additions & 4 deletions go/vt/vtorc/inst/analysis_dao_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ var (
// The initialSQL is a set of insert commands copied from a dump of an actual running VTOrc instances. The relevant insert commands are here.
// This is a dump taken from a test running 4 tablets, zone1-101 is the primary, zone1-100 is a replica, zone1-112 is a rdonly and zone2-200 is a cross-cell replica.
initialSQL = []string{
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000112-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000100-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,'',0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2);`,
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000200-relay-bin.000002',15815,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000112-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0,false);`,
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000100-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0,false);`,
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,'',0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2,false);`,
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000200-relay-bin.000002',15815,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0,false);`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000100','localhost',6711,'ks','0','zone1',2,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130307d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731307d20706f72745f6d61703a7b6b65793a227674222076616c75653a363730397d206b657973706163653a226b73222073686172643a22302220747970653a5245504c494341206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363731312064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000101','localhost',6714,'ks','0','zone1',1,'2022-12-28 07:23:25.129898+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130317d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731337d20706f72745f6d61703a7b6b65793a227674222076616c75653a363731327d206b657973706163653a226b73222073686172643a22302220747970653a5052494d415259206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a36373134207072696d6172795f7465726d5f73746172745f74696d653a7b7365636f6e64733a31363732323132323035206e616e6f7365636f6e64733a3132393839383030307d2064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000112','localhost',6747,'ks','0','zone1',3,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3131327d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363734367d20706f72745f6d61703a7b6b65793a227674222076616c75653a363734357d206b657973706163653a226b73222073686172643a22302220747970653a52444f4e4c59206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363734372064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
Expand Down Expand Up @@ -96,6 +96,29 @@ func TestGetReplicationAnalysisDecision(t *testing.T) {
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: PrimaryTabletDeleted,
}, {
name: "StalledDiskPrimary",
info: []*test.InfoForRecoveryAnalysis{{
TabletInfo: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100},
Hostname: "localhost",
Keyspace: "ks",
Shard: "0",
Type: topodatapb.TabletType_PRIMARY,
MysqlHostname: "localhost",
MysqlPort: 6709,
},
DurabilityPolicy: "none",
LastCheckValid: 0,
CountReplicas: 4,
CountValidReplicas: 4,
CountValidReplicatingReplicas: 0,
IsPrimary: 1,
IsStalledDisk: 1,
}},
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: StalledDiskPrimary,
}, {
name: "DeadPrimary",
info: []*test.InfoForRecoveryAnalysis{{
Expand Down
1 change: 1 addition & 0 deletions go/vt/vtorc/inst/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type Instance struct {
IsUpToDate bool
IsRecentlyChecked bool
SecondsSinceLastSeen sql.NullInt64
StalledDisk bool

AllowTLS bool

Expand Down
17 changes: 13 additions & 4 deletions go/vt/vtorc/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ func ReadTopologyInstanceBufferable(tabletAlias string, latency *stopwatch.Named
var tablet *topodatapb.Tablet
var fs *replicationdatapb.FullStatus
readingStartTime := time.Now()
stalledDisk := false
instance := NewInstance()
instanceFound := false
partialSuccess := false
Expand Down Expand Up @@ -205,6 +206,9 @@ func ReadTopologyInstanceBufferable(tabletAlias string, latency *stopwatch.Named

fs, err = fullStatus(tabletAlias)
if err != nil {
if config.GetStalledDiskPrimaryRecovery() && strings.Contains(err.Error(), "stalled disk") {
stalledDisk = true
}
goto Cleanup
}
partialSuccess = true // We at least managed to read something from the server.
Expand Down Expand Up @@ -381,9 +385,10 @@ Cleanup:

// Something is wrong, could be network-wise. Record that we
// tried to check the instance. last_attempted_check is also
// updated on success by writeInstance.
// updated on success by writeInstance. If the reason is a
// stalled disk, we can record that as well.
latency.Start("backend")
_ = UpdateInstanceLastChecked(tabletAlias, partialSuccess)
_ = UpdateInstanceLastChecked(tabletAlias, partialSuccess, stalledDisk)
latency.Stop("backend")
return nil, err
}
Expand Down Expand Up @@ -874,6 +879,7 @@ func mkInsertForInstances(instances []*Instance, instanceWasActuallyFound bool,
"semi_sync_primary_clients",
"semi_sync_replica_status",
"last_discovery_latency",
"stalled_disk",
}

values := make([]string, len(columns))
Expand Down Expand Up @@ -953,6 +959,7 @@ func mkInsertForInstances(instances []*Instance, instanceWasActuallyFound bool,
args = append(args, instance.SemiSyncPrimaryClients)
args = append(args, instance.SemiSyncReplicaStatus)
args = append(args, instance.LastDiscoveryLatency.Nanoseconds())
args = append(args, instance.StalledDisk)
}

sql, err := mkInsert("database_instance", columns, values, len(instances), insertIgnore)
Expand Down Expand Up @@ -998,16 +1005,18 @@ func WriteInstance(instance *Instance, instanceWasActuallyFound bool, lastError

// UpdateInstanceLastChecked updates the last_check timestamp in the vtorc backed database
// for a given instance
func UpdateInstanceLastChecked(tabletAlias string, partialSuccess bool) error {
func UpdateInstanceLastChecked(tabletAlias string, partialSuccess bool, stalledDisk bool) error {
writeFunc := func() error {
_, err := db.ExecVTOrc(`UPDATE database_instance
SET
last_checked = DATETIME('now'),
last_check_partial_success = ?
last_check_partial_success = ?,
stalled_disk = ?
WHERE
alias = ?
`,
partialSuccess,
stalledDisk,
tabletAlias,
)
if err != nil {
Expand Down
Loading

0 comments on commit baafe29

Please sign in to comment.