From 99cb9fb5f660d2e11ac8ac3f932fbca70a43cdd1 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Tue, 29 Oct 2024 17:02:24 -0400 Subject: [PATCH] sql: deflake Test*Relocate*Voters These tests were flaky since they would look up an initial replication state, and if the initial attempt failed, would keep retrying based on that state. Now, when a retry occurs, the new replication state is fetched so that the test can run a query that is expected to work if leaseholders or voters have changed. Release note: None --- pkg/sql/multitenant_admin_function_test.go | 165 ++++++++++----------- 1 file changed, 82 insertions(+), 83 deletions(-) diff --git a/pkg/sql/multitenant_admin_function_test.go b/pkg/sql/multitenant_admin_function_test.go index 4bb200374dcd..a7f10bddbe0a 100644 --- a/pkg/sql/multitenant_admin_function_test.go +++ b/pkg/sql/multitenant_admin_function_test.go @@ -363,7 +363,7 @@ func (te tenantExpected) isSet() bool { } func (te tenantExpected) validate( - t *testing.T, runQuery func() (*gosql.Rows, error), message string, + t *testing.T, runQuery func() (_ *gosql.Rows, msg string, _ error), ) { expectedErrorMessage := te.errorMessage expectedResults := te.result @@ -372,7 +372,7 @@ func (te tenantExpected) validate( // query to make the test less flaky. // See: https://github.com/cockroachdb/cockroach/issues/95252 testutils.SucceedsSoon(t, func() error { - rows, err := runQuery() + rows, message, err := runQuery() if expectedErrorMessage == "" { if err != nil { return errors.WithMessagef(err, "msg=%s", message) @@ -408,7 +408,7 @@ func (te tenantExpected) validate( } default: // For deterministic results that should be non-empty. if !strings.Contains(actualColResult, expectedColResult) { - return errors.Newf("expected %q contains %q %s row=%d col=%d", actualColResult, expectedColResult, message, i, j) + return errors.Newf("expected %q to be contained in result %q %s row=%d col=%d", expectedColResult, actualColResult, message, i, j) } } } @@ -628,10 +628,10 @@ func TestMultiTenantAdminFunction(t *testing.T) { } tExp.validate( t, - func() (*gosql.Rows, error) { - return db.QueryContext(ctx, tc.query) + func() (*gosql.Rows, string, error) { + rows, err := db.QueryContext(ctx, tc.query) + return rows, message, err }, - message, ) }, ) @@ -674,7 +674,7 @@ func TestTruncateTable(t *testing.T) { tExp.validate( t, - func() (*gosql.Rows, error) { + func() (*gosql.Rows, string, error) { // validateErr and validateRows come from separate queries for TRUNCATE. _, validateErr := db.ExecContext(ctx, "TRUNCATE TABLE t;") var validateRows *gosql.Rows @@ -682,9 +682,8 @@ func TestTruncateTable(t *testing.T) { validateRows, err = db.QueryContext(ctx, "SELECT start_key, end_key from [SHOW RANGES FROM INDEX t@primary];") require.NoErrorf(t, err, message) } - return validateRows, validateErr + return validateRows, message, validateErr }, - message, ) }, ) @@ -752,31 +751,31 @@ func TestRelocateVoters(t *testing.T) { testCluster.ToggleLeaseQueues(false) testCluster.ToggleReplicateQueues(false) testCluster.ToggleSplitQueues(false) - replicaState := getReplicaState( - t, - ctx, - db, - expectedNumReplicas, - expectedNumVotingReplicas, - expectedNumNonVotingReplicas, - message, - ) - replicas := replicaState.replicas - // Set toReplica to the node that does not have a voting replica for t. - toReplica := getToReplica(testCluster.NodeIDs(), replicas) - // Set fromReplica to the first non-leaseholder voting replica for t. - fromReplica := replicas[0] - if fromReplica == replicaState.leaseholder { - fromReplica = replicas[1] - } - query := fmt.Sprintf(tc.query, fromReplica, toReplica) - message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica) tExp.validate( t, - func() (*gosql.Rows, error) { - return db.QueryContext(ctx, query) + func() (_ *gosql.Rows, msg string, _ error) { + replicaState := getReplicaState( + t, + ctx, + db, + expectedNumReplicas, + expectedNumVotingReplicas, + expectedNumNonVotingReplicas, + message, + ) + replicas := replicaState.replicas + // Set toReplica to the node that does not have a voting replica for t. + toReplica := getToReplica(testCluster.NodeIDs(), replicas) + // Set fromReplica to the first non-leaseholder voting replica for t. + fromReplica := replicas[0] + if fromReplica == replicaState.leaseholder { + fromReplica = replicas[1] + } + query := fmt.Sprintf(tc.query, fromReplica, toReplica) + message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica) + rows, err := db.QueryContext(ctx, query) + return rows, message, err }, - message, ) }, ) @@ -833,28 +832,28 @@ func TestExperimentalRelocateVoters(t *testing.T) { testCluster.ToggleLeaseQueues(false) testCluster.ToggleReplicateQueues(false) testCluster.ToggleSplitQueues(false) - replicaState := getReplicaState( - t, - ctx, - db, - expectedNumReplicas, - expectedNumVotingReplicas, - expectedNumNonVotingReplicas, - message, - ) - votingReplicas := replicaState.votingReplicas - newVotingReplicas := make([]roachpb.NodeID, len(votingReplicas)) - newVotingReplicas[0] = votingReplicas[0] - newVotingReplicas[1] = votingReplicas[1] - newVotingReplicas[2] = getToReplica(testCluster.NodeIDs(), votingReplicas) - query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newVotingReplicas)) - message = getReplicaStateMessage(tenant, query, replicaState, votingReplicas[2], newVotingReplicas[2]) tExp.validate( t, - func() (*gosql.Rows, error) { - return db.QueryContext(ctx, query) + func() (*gosql.Rows, string, error) { + replicaState := getReplicaState( + t, + ctx, + db, + expectedNumReplicas, + expectedNumVotingReplicas, + expectedNumNonVotingReplicas, + message, + ) + votingReplicas := replicaState.votingReplicas + newVotingReplicas := make([]roachpb.NodeID, len(votingReplicas)) + newVotingReplicas[0] = votingReplicas[0] + newVotingReplicas[1] = votingReplicas[1] + newVotingReplicas[2] = getToReplica(testCluster.NodeIDs(), votingReplicas) + query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newVotingReplicas)) + message = getReplicaStateMessage(tenant, query, replicaState, votingReplicas[2], newVotingReplicas[2]) + rows, err := db.QueryContext(ctx, query) + return rows, message, err }, - message, ) }, ) @@ -927,27 +926,27 @@ func TestRelocateNonVoters(t *testing.T) { testCluster.ToggleLeaseQueues(false) testCluster.ToggleReplicateQueues(false) testCluster.ToggleSplitQueues(false) - replicaState := getReplicaState( - t, - ctx, - db, - expectedNumReplicas, - expectedNumVotingReplicas, - expectedNumNonVotingReplicas, - message, - ) - // Set toReplica to the node that does not have a voting replica for t. - toReplica := getToReplica(testCluster.NodeIDs(), replicaState.replicas) - // Set fromReplica to the first non-leaseholder voting replica for t. - fromReplica := replicaState.nonVotingReplicas[0] - query := fmt.Sprintf(tc.query, fromReplica, toReplica) - message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica) tExp.validate( t, - func() (*gosql.Rows, error) { - return db.QueryContext(ctx, query) + func() (*gosql.Rows, string, error) { + replicaState := getReplicaState( + t, + ctx, + db, + expectedNumReplicas, + expectedNumVotingReplicas, + expectedNumNonVotingReplicas, + message, + ) + // Set toReplica to the node that does not have a voting replica for t. + toReplica := getToReplica(testCluster.NodeIDs(), replicaState.replicas) + // Set fromReplica to the first non-leaseholder voting replica for t. + fromReplica := replicaState.nonVotingReplicas[0] + query := fmt.Sprintf(tc.query, fromReplica, toReplica) + message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica) + rows, err := db.QueryContext(ctx, query) + return rows, message, err }, - message, ) }, ) @@ -1002,24 +1001,24 @@ func TestExperimentalRelocateNonVoters(t *testing.T) { testCluster.ToggleLeaseQueues(false) testCluster.ToggleReplicateQueues(false) testCluster.ToggleSplitQueues(false) - replicaState := getReplicaState( - t, - ctx, - db, - expectedNumReplicas, - expectedNumVotingReplicas, - expectedNumNonVotingReplicas, - message, - ) - newNonVotingReplicas := []roachpb.NodeID{getToReplica(testCluster.NodeIDs(), replicaState.replicas)} - query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newNonVotingReplicas)) - message = getReplicaStateMessage(tenant, query, replicaState, replicaState.nonVotingReplicas[0], newNonVotingReplicas[0]) tExp.validate( t, - func() (*gosql.Rows, error) { - return db.QueryContext(ctx, query) + func() (*gosql.Rows, string, error) { + replicaState := getReplicaState( + t, + ctx, + db, + expectedNumReplicas, + expectedNumVotingReplicas, + expectedNumNonVotingReplicas, + message, + ) + newNonVotingReplicas := []roachpb.NodeID{getToReplica(testCluster.NodeIDs(), replicaState.replicas)} + query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newNonVotingReplicas)) + message = getReplicaStateMessage(tenant, query, replicaState, replicaState.nonVotingReplicas[0], newNonVotingReplicas[0]) + rows, err := db.QueryContext(ctx, query) + return rows, message, err }, - message, ) }, )