From 8ef8e2097dd655f094596d7b4fe676deae558e7d Mon Sep 17 00:00:00 2001 From: Danil Grigorev Date: Fri, 12 Apr 2024 14:25:53 +0200 Subject: [PATCH] Fix machine discovery in test suite - Disc pressure fix for kube-vip Signed-off-by: Danil Grigorev --- .../cluster-template-docker-legacy.yaml | 4 -- .../cluster-template-docker.yaml | 4 -- test/e2e/helpers.go | 69 ++++++++++++++++++- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/test/e2e/data/infrastructure/cluster-template-docker-legacy.yaml b/test/e2e/data/infrastructure/cluster-template-docker-legacy.yaml index efc0f23f..ec677a3b 100644 --- a/test/e2e/data/infrastructure/cluster-template-docker-legacy.yaml +++ b/test/e2e/data/infrastructure/cluster-template-docker-legacy.yaml @@ -117,10 +117,6 @@ spec: nodeDrainTimeout: 30s registrationMethod: "address" registrationAddress: "${REGISTRATION_VIP}" - rolloutStrategy: - type: "RollingUpdate" - rollingUpdate: - maxSurge: 3 preRKE2Commands: - mkdir -p /var/lib/rancher/rke2/server/manifests/ && ctr images pull ghcr.io/kube-vip/kube-vip:v0.6.0 && ctr run --rm --net-host ghcr.io/kube-vip/kube-vip:v0.6.0 vip /kube-vip manifest daemonset --arp --interface $(ip -4 -j route list default | jq -r .[0].dev) --address ${REGISTRATION_VIP} --controlplane --leaderElection --taint --services --inCluster | tee /var/lib/rancher/rke2/server/manifests/kube-vip.yaml files: diff --git a/test/e2e/data/infrastructure/cluster-template-docker.yaml b/test/e2e/data/infrastructure/cluster-template-docker.yaml index b202ec19..ab6b8151 100644 --- a/test/e2e/data/infrastructure/cluster-template-docker.yaml +++ b/test/e2e/data/infrastructure/cluster-template-docker.yaml @@ -117,10 +117,6 @@ spec: nodeDrainTimeout: 30s registrationMethod: "address" registrationAddress: "${REGISTRATION_VIP}" - rolloutStrategy: - type: "RollingUpdate" - rollingUpdate: - maxSurge: 3 preRKE2Commands: - mkdir -p /var/lib/rancher/rke2/server/manifests/ && ctr images pull ghcr.io/kube-vip/kube-vip:v0.6.0 && ctr run --rm --net-host ghcr.io/kube-vip/kube-vip:v0.6.0 vip /kube-vip manifest daemonset --arp --interface $(ip -4 -j route list default | jq -r .[0].dev) --address ${REGISTRATION_VIP} --controlplane --leaderElection --taint --services --inCluster | tee /var/lib/rancher/rke2/server/manifests/kube-vip.yaml files: diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 8dcbb219..61afdcc8 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -26,6 +26,7 @@ import ( "math/rand" "net" "os/exec" + "sort" "strings" "time" @@ -33,6 +34,7 @@ import ( . "github.com/onsi/gomega" "github.com/pkg/errors" v1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" @@ -140,7 +142,7 @@ func ApplyClusterTemplateAndWait(ctx context.Context, input ApplyClusterTemplate input.WaitForControlPlaneInitialized(ctx, input, result) Byf("Waiting for the machine deployments to be provisioned") - result.MachineDeployments = framework.DiscoveryAndWaitForMachineDeployments(ctx, framework.DiscoveryAndWaitForMachineDeploymentsInput{ + result.MachineDeployments = DiscoveryAndWaitForMachineDeployments(ctx, framework.DiscoveryAndWaitForMachineDeploymentsInput{ Lister: input.ClusterProxy.GetClient(), Cluster: result.Cluster, }, input.WaitForMachineDeployments...) @@ -151,6 +153,71 @@ func ApplyClusterTemplateAndWait(ctx context.Context, input ApplyClusterTemplate } } +// DiscoveryAndWaitForMachineDeployments discovers the MachineDeployments existing in a cluster and waits for them to be ready (all the machine provisioned). +func DiscoveryAndWaitForMachineDeployments(ctx context.Context, input framework.DiscoveryAndWaitForMachineDeploymentsInput, intervals ...interface{}) []*clusterv1.MachineDeployment { + Expect(ctx).NotTo(BeNil(), "ctx is required for DiscoveryAndWaitForMachineDeployments") + Expect(input.Lister).ToNot(BeNil(), "Invalid argument. input.Lister can't be nil when calling DiscoveryAndWaitForMachineDeployments") + Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling DiscoveryAndWaitForMachineDeployments") + + machineDeployments := framework.GetMachineDeploymentsByCluster(ctx, framework.GetMachineDeploymentsByClusterInput{ + Lister: input.Lister, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + for _, deployment := range machineDeployments { + WaitForMachineDeploymentNodesToExist(ctx, framework.WaitForMachineDeploymentNodesToExistInput{ + Lister: input.Lister, + Cluster: input.Cluster, + MachineDeployment: deployment, + }, intervals...) + + framework.AssertMachineDeploymentFailureDomains(ctx, framework.AssertMachineDeploymentFailureDomainsInput{ + Lister: input.Lister, + Cluster: input.Cluster, + MachineDeployment: deployment, + }) + } + return machineDeployments +} + +// WaitForMachineDeploymentNodesToExist waits until all nodes associated with a machine deployment exist. +func WaitForMachineDeploymentNodesToExist(ctx context.Context, input framework.WaitForMachineDeploymentNodesToExistInput, intervals ...interface{}) { + Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForMachineDeploymentNodesToExist") + Expect(input.Lister).ToNot(BeNil(), "Invalid argument. input.Lister can't be nil when calling WaitForMachineDeploymentNodesToExist") + Expect(input.MachineDeployment).ToNot(BeNil(), "Invalid argument. input.MachineDeployment can't be nil when calling WaitForMachineDeploymentNodesToExist") + + By("Waiting for the workload nodes to exist") + Eventually(func(g Gomega) { + selectorMap, err := metav1.LabelSelectorAsMap(&input.MachineDeployment.Spec.Selector) + g.Expect(err).ToNot(HaveOccurred()) + ms := &clusterv1.MachineSetList{} + err = input.Lister.List(ctx, ms, client.InNamespace(input.Cluster.Namespace), client.MatchingLabels(selectorMap)) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(ms.Items).NotTo(BeEmpty()) + machineSet := ms.Items[0] + sort.Slice(ms.Items, func(i, j int) bool { + return ms.Items[j].CreationTimestamp.After(ms.Items[i].CreationTimestamp.Time) + }) + for _, ms := range ms.Items { + if *machineSet.Spec.Replicas == *input.MachineDeployment.Spec.Replicas { + machineSet = ms + } + } + selectorMap, err = metav1.LabelSelectorAsMap(&machineSet.Spec.Selector) + g.Expect(err).ToNot(HaveOccurred()) + machines := &clusterv1.MachineList{} + err = input.Lister.List(ctx, machines, client.InNamespace(machineSet.Namespace), client.MatchingLabels(selectorMap)) + g.Expect(err).ToNot(HaveOccurred()) + count := 0 + for _, machine := range machines.Items { + if machine.Status.NodeRef != nil { + count++ + } + } + g.Expect(count).To(Equal(int(*input.MachineDeployment.Spec.Replicas))) + }, intervals...).Should(Succeed(), "Timed out waiting for %d nodes to be created for MachineDeployment %s", int(*input.MachineDeployment.Spec.Replicas), klog.KObj(input.MachineDeployment)) +} + func SetControllerVersionAndWait(ctx context.Context, proxy framework.ClusterProxy, version string) { cp := &v1.Deployment{} Expect(proxy.GetClient().Get(ctx, types.NamespacedName{