Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
133747: sql: deflake Test*Relocate*Voters r=rafiss a=rafiss

These tests were flaky since they would look up an initial replication state, and if the initial attempt failed, would keep retrying based on that state. Now, when a retry occurs, the new replication state is fetched so that the test can run a query that is expected to work if leaseholders or voters have changed.

fixes cockroachdb#132793
fixes cockroachdb#133684
Release note: None

Co-authored-by: Rafi Shamim <[email protected]>
  • Loading branch information
craig[bot] and rafiss committed Oct 30, 2024
2 parents 75cda80 + 99cb9fb commit 8450352
Showing 1 changed file with 82 additions and 83 deletions.
165 changes: 82 additions & 83 deletions pkg/sql/multitenant_admin_function_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ func (te tenantExpected) isSet() bool {
}

func (te tenantExpected) validate(
t *testing.T, runQuery func() (*gosql.Rows, error), message string,
t *testing.T, runQuery func() (_ *gosql.Rows, msg string, _ error),
) {
expectedErrorMessage := te.errorMessage
expectedResults := te.result
Expand All @@ -372,7 +372,7 @@ func (te tenantExpected) validate(
// query to make the test less flaky.
// See: https://github.com/cockroachdb/cockroach/issues/95252
testutils.SucceedsSoon(t, func() error {
rows, err := runQuery()
rows, message, err := runQuery()
if expectedErrorMessage == "" {
if err != nil {
return errors.WithMessagef(err, "msg=%s", message)
Expand Down Expand Up @@ -408,7 +408,7 @@ func (te tenantExpected) validate(
}
default: // For deterministic results that should be non-empty.
if !strings.Contains(actualColResult, expectedColResult) {
return errors.Newf("expected %q contains %q %s row=%d col=%d", actualColResult, expectedColResult, message, i, j)
return errors.Newf("expected %q to be contained in result %q %s row=%d col=%d", expectedColResult, actualColResult, message, i, j)
}
}
}
Expand Down Expand Up @@ -628,10 +628,10 @@ func TestMultiTenantAdminFunction(t *testing.T) {
}
tExp.validate(
t,
func() (*gosql.Rows, error) {
return db.QueryContext(ctx, tc.query)
func() (*gosql.Rows, string, error) {
rows, err := db.QueryContext(ctx, tc.query)
return rows, message, err
},
message,
)
},
)
Expand Down Expand Up @@ -674,17 +674,16 @@ func TestTruncateTable(t *testing.T) {

tExp.validate(
t,
func() (*gosql.Rows, error) {
func() (*gosql.Rows, string, error) {
// validateErr and validateRows come from separate queries for TRUNCATE.
_, validateErr := db.ExecContext(ctx, "TRUNCATE TABLE t;")
var validateRows *gosql.Rows
if err == nil {
validateRows, err = db.QueryContext(ctx, "SELECT start_key, end_key from [SHOW RANGES FROM INDEX t@primary];")
require.NoErrorf(t, err, message)
}
return validateRows, validateErr
return validateRows, message, validateErr
},
message,
)
},
)
Expand Down Expand Up @@ -752,31 +751,31 @@ func TestRelocateVoters(t *testing.T) {
testCluster.ToggleLeaseQueues(false)
testCluster.ToggleReplicateQueues(false)
testCluster.ToggleSplitQueues(false)
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
replicas := replicaState.replicas
// Set toReplica to the node that does not have a voting replica for t.
toReplica := getToReplica(testCluster.NodeIDs(), replicas)
// Set fromReplica to the first non-leaseholder voting replica for t.
fromReplica := replicas[0]
if fromReplica == replicaState.leaseholder {
fromReplica = replicas[1]
}
query := fmt.Sprintf(tc.query, fromReplica, toReplica)
message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica)
tExp.validate(
t,
func() (*gosql.Rows, error) {
return db.QueryContext(ctx, query)
func() (_ *gosql.Rows, msg string, _ error) {
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
replicas := replicaState.replicas
// Set toReplica to the node that does not have a voting replica for t.
toReplica := getToReplica(testCluster.NodeIDs(), replicas)
// Set fromReplica to the first non-leaseholder voting replica for t.
fromReplica := replicas[0]
if fromReplica == replicaState.leaseholder {
fromReplica = replicas[1]
}
query := fmt.Sprintf(tc.query, fromReplica, toReplica)
message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica)
rows, err := db.QueryContext(ctx, query)
return rows, message, err
},
message,
)
},
)
Expand Down Expand Up @@ -833,28 +832,28 @@ func TestExperimentalRelocateVoters(t *testing.T) {
testCluster.ToggleLeaseQueues(false)
testCluster.ToggleReplicateQueues(false)
testCluster.ToggleSplitQueues(false)
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
votingReplicas := replicaState.votingReplicas
newVotingReplicas := make([]roachpb.NodeID, len(votingReplicas))
newVotingReplicas[0] = votingReplicas[0]
newVotingReplicas[1] = votingReplicas[1]
newVotingReplicas[2] = getToReplica(testCluster.NodeIDs(), votingReplicas)
query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newVotingReplicas))
message = getReplicaStateMessage(tenant, query, replicaState, votingReplicas[2], newVotingReplicas[2])
tExp.validate(
t,
func() (*gosql.Rows, error) {
return db.QueryContext(ctx, query)
func() (*gosql.Rows, string, error) {
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
votingReplicas := replicaState.votingReplicas
newVotingReplicas := make([]roachpb.NodeID, len(votingReplicas))
newVotingReplicas[0] = votingReplicas[0]
newVotingReplicas[1] = votingReplicas[1]
newVotingReplicas[2] = getToReplica(testCluster.NodeIDs(), votingReplicas)
query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newVotingReplicas))
message = getReplicaStateMessage(tenant, query, replicaState, votingReplicas[2], newVotingReplicas[2])
rows, err := db.QueryContext(ctx, query)
return rows, message, err
},
message,
)
},
)
Expand Down Expand Up @@ -927,27 +926,27 @@ func TestRelocateNonVoters(t *testing.T) {
testCluster.ToggleLeaseQueues(false)
testCluster.ToggleReplicateQueues(false)
testCluster.ToggleSplitQueues(false)
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
// Set toReplica to the node that does not have a voting replica for t.
toReplica := getToReplica(testCluster.NodeIDs(), replicaState.replicas)
// Set fromReplica to the first non-leaseholder voting replica for t.
fromReplica := replicaState.nonVotingReplicas[0]
query := fmt.Sprintf(tc.query, fromReplica, toReplica)
message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica)
tExp.validate(
t,
func() (*gosql.Rows, error) {
return db.QueryContext(ctx, query)
func() (*gosql.Rows, string, error) {
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
// Set toReplica to the node that does not have a voting replica for t.
toReplica := getToReplica(testCluster.NodeIDs(), replicaState.replicas)
// Set fromReplica to the first non-leaseholder voting replica for t.
fromReplica := replicaState.nonVotingReplicas[0]
query := fmt.Sprintf(tc.query, fromReplica, toReplica)
message = getReplicaStateMessage(tenant, query, replicaState, fromReplica, toReplica)
rows, err := db.QueryContext(ctx, query)
return rows, message, err
},
message,
)
},
)
Expand Down Expand Up @@ -1002,24 +1001,24 @@ func TestExperimentalRelocateNonVoters(t *testing.T) {
testCluster.ToggleLeaseQueues(false)
testCluster.ToggleReplicateQueues(false)
testCluster.ToggleSplitQueues(false)
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
newNonVotingReplicas := []roachpb.NodeID{getToReplica(testCluster.NodeIDs(), replicaState.replicas)}
query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newNonVotingReplicas))
message = getReplicaStateMessage(tenant, query, replicaState, replicaState.nonVotingReplicas[0], newNonVotingReplicas[0])
tExp.validate(
t,
func() (*gosql.Rows, error) {
return db.QueryContext(ctx, query)
func() (*gosql.Rows, string, error) {
replicaState := getReplicaState(
t,
ctx,
db,
expectedNumReplicas,
expectedNumVotingReplicas,
expectedNumNonVotingReplicas,
message,
)
newNonVotingReplicas := []roachpb.NodeID{getToReplica(testCluster.NodeIDs(), replicaState.replicas)}
query := fmt.Sprintf(tc.query, nodeIDsToArrayString(newNonVotingReplicas))
message = getReplicaStateMessage(tenant, query, replicaState, replicaState.nonVotingReplicas[0], newNonVotingReplicas[0])
rows, err := db.QueryContext(ctx, query)
return rows, message, err
},
message,
)
},
)
Expand Down

0 comments on commit 8450352

Please sign in to comment.