From e02e818ea5f5863b1dc5b864a0691b32da30b2d3 Mon Sep 17 00:00:00 2001 From: Markus Walker Date: Wed, 27 Mar 2024 09:54:01 -0700 Subject: [PATCH] Fix flaky nodescaling test cases --- extensions/machinepools/machinepools.go | 10 +++++++-- extensions/provisioning/creates.go | 28 ++++++++++++++++--------- extensions/rke1/nodepools/nodepools.go | 4 ++-- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/extensions/machinepools/machinepools.go b/extensions/machinepools/machinepools.go index 0cfd6ea2..ad077fb5 100644 --- a/extensions/machinepools/machinepools.go +++ b/extensions/machinepools/machinepools.go @@ -1,6 +1,7 @@ package machinepools import ( + "context" "fmt" "strconv" "strings" @@ -72,14 +73,19 @@ func updateMachinePoolQuantity(client *rancher.Client, cluster *v1.SteveAPIObjec return nil, err } - err = kwait.Poll(500*time.Millisecond, defaults.TenMinuteTimeout, func() (done bool, err error) { + err = kwait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, defaults.ThirtyMinuteTimeout, true, func(ctx context.Context) (done bool, err error) { + client, err = client.ReLogin() + if err != nil { + return false, err + } + clusterResp, err := client.Steve.SteveType("provisioning.cattle.io.cluster").ByID(cluster.ID) if err != nil { return false, err } if clusterResp.ObjectMeta.State.Name == active && - nodestat.AllManagementNodeReady(client, cluster.ID, defaults.ThirtyMinuteTimeout) == nil { + nodestat.AllMachineReady(client, cluster.ID, defaults.ThirtyMinuteTimeout) == nil { return true, nil } diff --git a/extensions/provisioning/creates.go b/extensions/provisioning/creates.go index f82313c5..694590c5 100644 --- a/extensions/provisioning/creates.go +++ b/extensions/provisioning/creates.go @@ -44,9 +44,10 @@ import ( ) const ( - active = "active" - internalIP = "rke2.io/internal-ip" - namespace = "fleet-default" + active = "active" + internalIP = "alpha.kubernetes.io/provided-node-ip" + rke1ExternalIP = "rke.cattle.io/external-ip" + namespace = "fleet-default" rke2k3sAirgapCustomCluster = "rke2k3sairgapcustomcluster" rke2k3sNodeCorralName = "rke2k3sregisterNode" @@ -805,13 +806,14 @@ func AddRKE2K3SCustomClusterNodes(client *rancher.Client, cluster *v1.SteveAPIOb logrus.Infof(output) } - err = kwait.Poll(500*time.Millisecond, defaults.TenMinuteTimeout, func() (done bool, err error) { + err = kwait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, defaults.ThirtyMinuteTimeout, true, func(ctx context.Context) (done bool, err error) { clusterResp, err := client.Steve.SteveType(clusters.ProvisioningSteveResourceType).ByID(cluster.ID) if err != nil { return false, err } - if clusterResp.ObjectMeta.State.Name == active && nodestat.AllManagementNodeReady(client, cluster.ID, defaults.ThirtyMinuteTimeout) == nil { + if clusterResp.ObjectMeta.State.Name == active && + nodestat.AllMachineReady(client, cluster.ID, defaults.ThirtyMinuteTimeout) == nil { return true, nil } return false, nil @@ -849,7 +851,7 @@ func DeleteRKE2K3SCustomClusterNodes(client *rancher.Client, clusterID string, c return err } - err = kwait.Poll(500*time.Millisecond, defaults.TenMinuteTimeout, func() (done bool, err error) { + err = kwait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, defaults.ThirtyMinuteTimeout, true, func(ctx context.Context) (done bool, err error) { _, err = client.Steve.SteveType(machineSteveResourceType).ByID(machine.ID) if err != nil { logrus.Infof("Node has successfully been deleted!") @@ -887,13 +889,19 @@ func AddRKE1CustomClusterNodes(client *rancher.Client, cluster *management.Clust logrus.Infof(output) } - err = kwait.Poll(500*time.Millisecond, defaults.TenMinuteTimeout, func() (done bool, err error) { + err = kwait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, defaults.ThirtyMinuteTimeout, true, func(ctx context.Context) (done bool, err error) { + client, err = client.ReLogin() + if err != nil { + return false, err + } + clusterResp, err := client.Management.Cluster.ByID(cluster.ID) if err != nil { return false, err } - if clusterResp.State == active && nodestat.AllManagementNodeReady(client, cluster.ID, defaults.ThirtyMinuteTimeout) == nil { + if clusterResp.State == active && + nodestat.AllManagementNodeReady(client, cluster.ID, defaults.ThirtyMinuteTimeout) == nil { return true, nil } return false, nil @@ -916,7 +924,7 @@ func DeleteRKE1CustomClusterNodes(client *rancher.Client, cluster *management.Cl for _, nodeToDelete := range nodesToDelete { for _, node := range nodes.Data { - if node.ExternalIPAddress == nodeToDelete.PublicIPAddress { + if node.Annotations[rke1ExternalIP] == nodeToDelete.PublicIPAddress { machine, err := client.Management.Node.ByID(node.ID) if err != nil { return err @@ -928,7 +936,7 @@ func DeleteRKE1CustomClusterNodes(client *rancher.Client, cluster *management.Cl return err } - err = kwait.Poll(500*time.Millisecond, defaults.TenMinuteTimeout, func() (done bool, err error) { + err = kwait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, defaults.ThirtyMinuteTimeout, true, func(ctx context.Context) (done bool, err error) { _, err = client.Management.Node.ByID(machine.ID) if err != nil { logrus.Infof("Node has successfully been deleted!") diff --git a/extensions/rke1/nodepools/nodepools.go b/extensions/rke1/nodepools/nodepools.go index d9d2639b..94f2c2a6 100644 --- a/extensions/rke1/nodepools/nodepools.go +++ b/extensions/rke1/nodepools/nodepools.go @@ -1,6 +1,7 @@ package rke1 import ( + "context" "strconv" "time" @@ -110,14 +111,13 @@ func updateNodePoolQuantity(client *rancher.Client, cluster *management.Cluster, return nil, err } - err = kwait.Poll(500*time.Millisecond, defaults.TenMinuteTimeout, func() (done bool, err error) { + err = kwait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, defaults.ThirtyMinuteTimeout, true, func(ctx context.Context) (done bool, err error) { clusterResp, err := client.Management.Cluster.ByID(cluster.ID) if err != nil { return false, err } if clusterResp.State == active && nodestat.AllManagementNodeReady(client, clusterResp.ID, defaults.ThirtyMinuteTimeout) == nil { - logrus.Infof("Node pool is scaled!") return true, nil } return false, nil