From 2decea7715ed3ae97b57b4c5492157ea7d9d44fe Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Thu, 31 Oct 2024 17:25:31 +0100 Subject: [PATCH] Extend Node drain e2e test for MachineDrainRules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- .../{node_drain_timeout.go => node_drain.go} | 432 +++++++++++++----- ...ain_timeout_test.go => node_drain_test.go} | 2 +- test/framework/deployment_helpers.go | 4 + 3 files changed, 321 insertions(+), 117 deletions(-) rename test/e2e/{node_drain_timeout.go => node_drain.go} (51%) rename test/e2e/{node_drain_timeout_test.go => node_drain_test.go} (94%) diff --git a/test/e2e/node_drain_timeout.go b/test/e2e/node_drain.go similarity index 51% rename from test/e2e/node_drain_timeout.go rename to test/e2e/node_drain.go index fa4a8f353011..8c2f3e757225 100644 --- a/test/e2e/node_drain_timeout.go +++ b/test/e2e/node_drain.go @@ -68,22 +68,16 @@ type NodeDrainTimeoutSpecInput struct { // NodeDrainTimeoutSpec goes through the following steps: // * Create cluster with 3 CP & 1 worker Machine // * Ensure Node label is set & NodeDrainTimeout is set to 0 (wait forever) +// * Deploy MachineDrainRules // * Deploy Deployment with unevictable Pods on CP & MD Nodes // * Deploy Deployment with evictable Pods with finalizer on CP & MD Nodes -// * Trigger Scale down to 1 CP and 0 MD Machines -// * Verify Node drains for control plane and MachineDeployment Machines are blocked (PDBs & Pods with finalizer) -// - DrainingSucceeded conditions should: -// - show 1 evicted Pod with deletionTimestamp (still exists because of finalizer) -// - show 1 Pod which could not be evicted because of PDB -// - Verify the evicted Pod has terminated (i.e. succeeded) and it was evicted -// -// * Unblock deletion of evicted Pods by removing the finalizer -// * Verify Node drains for control plane and MachineDeployment Machines are blocked (only PDBs) -// - DrainingSucceeded conditions should: -// - not contain any Pods with deletionTimestamp -// - show 1 Pod which could not be evicted because of PDB -// -// * Set NodeDrainTimeout to 1s to unblock drain +// * Trigger Node drain by scaling down the control plane to 1 and MachineDeployments to 0 +// * Get draining control plane and MachineDeployment Machines +// * Verify drain of Deployments with order 1 +// * Verify drain of Deployments with order 5 +// * Verify skipped Pods are still there and don't have a deletionTimestamp +// * Verify Node drains for control plane and MachineDeployment Machines are blocked (only by PDBs) +// * Set NodeDrainTimeout to 1s to unblock Node drain // * Verify scale down succeeded because Node drains were unblocked. func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeoutSpecInput) { var ( @@ -110,7 +104,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) }) - It("A node should be forcefully removed if it cannot be drained in time", func() { + It("A node should be drained correctly", func() { By("Creating a workload cluster") infrastructureProvider := clusterctl.DefaultInfrastructureProvider if input.InfrastructureProvider != nil { @@ -118,6 +112,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo } controlPlaneReplicas := 3 + clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6)) clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ ClusterProxy: input.BootstrapClusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ @@ -127,7 +122,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo InfrastructureProvider: infrastructureProvider, Flavor: ptr.Deref(input.Flavor, "node-drain"), Namespace: namespace.Name, - ClusterName: fmt.Sprintf("%s-%s", specName, util.RandomString(6)), + ClusterName: clusterName, KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion), ControlPlaneMachineCount: ptr.To[int64](int64(controlPlaneReplicas)), WorkerMachineCount: ptr.To[int64](1), @@ -177,61 +172,82 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, cluster.Namespace, cluster.Name) - By("Deploy Deployment with unevictable Pods on control plane Nodes.") - cpDeploymentAndPDBName := fmt.Sprintf("%s-%s", "unevictable-pod-cp", util.RandomString(3)) - framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ - WorkloadClusterProxy: workloadClusterProxy, - ControlPlane: controlplane, - DeploymentName: cpDeploymentAndPDBName, - Namespace: "unevictable-workload", - NodeSelector: map[string]string{nodeOwnerLabelKey: "KubeadmControlPlane-" + controlplane.Name}, - WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), - }) - By("Deploy Deployment with unevictable Pods on MachineDeployment Nodes.") - mdDeploymentAndPDBNames := map[string]string{} - for _, md := range machineDeployments { - mdDeploymentAndPDBNames[md.Name] = fmt.Sprintf("%s-%s", "unevictable-pod-md", util.RandomString(3)) - framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ - WorkloadClusterProxy: workloadClusterProxy, - MachineDeployment: md, - DeploymentName: mdDeploymentAndPDBNames[md.Name], - Namespace: "unevictable-workload", - NodeSelector: map[string]string{nodeOwnerLabelKey: "MachineDeployment-" + md.Name}, - WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), - }) - } + By("Deploy MachineDrainRules.") + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + generateMachineDrainRule(namespace.Name, clusterName, "drain-order-1", 1))).To(Succeed()) + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + generateMachineDrainRule(namespace.Name, clusterName, "drain-order-5", 5))).To(Succeed()) + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + generateMachineDrainRule(namespace.Name, clusterName, "drain-order-10", 10))).To(Succeed()) - By("Deploy Deployment with evictable Pods with finalizer on control plane Nodes.") - cpDeploymentWithFinalizerName := fmt.Sprintf("%s-%s", "evictable-pod-cp", util.RandomString(3)) - framework.DeployEvictablePod(ctx, framework.DeployEvictablePodInput{ + By("Deploy Deployment with unevictable Pods on control plane and MachineDeployment Nodes.") + framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ WorkloadClusterProxy: workloadClusterProxy, ControlPlane: controlplane, - DeploymentName: cpDeploymentWithFinalizerName, - Namespace: "evictable-workload", + DeploymentName: cpDeploymentWithPDBName(), + Namespace: "unevictable-workload", NodeSelector: map[string]string{nodeOwnerLabelKey: "KubeadmControlPlane-" + controlplane.Name}, ModifyDeployment: func(deployment *appsv1.Deployment) { - deployment.Spec.Template.ObjectMeta.Finalizers = []string{"test.cluster.x-k8s.io/block"} + // Ensure we try to drain unevictable Pods last, otherwise they block drain of evictable Pods. + deployment.Spec.Template.Labels["mdr"] = "drain-order-10" }, WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), }) - By("Deploy Deployment with evictable Pods with finalizer on MachineDeployment Nodes.") - mdDeploymentWithFinalizerName := map[string]string{} for _, md := range machineDeployments { - mdDeploymentWithFinalizerName[md.Name] = fmt.Sprintf("%s-%s", "evictable-pod-md", util.RandomString(3)) - framework.DeployEvictablePod(ctx, framework.DeployEvictablePodInput{ + framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ WorkloadClusterProxy: workloadClusterProxy, MachineDeployment: md, - DeploymentName: mdDeploymentWithFinalizerName[md.Name], - Namespace: "evictable-workload", + DeploymentName: mdDeploymentWithPDBName(md.Name), + Namespace: "unevictable-workload", NodeSelector: map[string]string{nodeOwnerLabelKey: "MachineDeployment-" + md.Name}, + ModifyDeployment: func(deployment *appsv1.Deployment) { + // Ensure we try to drain unevictable Pods last, otherwise they block drain of evictable Pods. + deployment.Spec.Template.Labels["mdr"] = "drain-order-10" + }, + WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), + }) + } + + By("Deploy Deployments with evictable Pods with finalizer on control plane and MachineDeployment Nodes.") + evictablePodDeployments := map[string]map[string]string{ + "drain-order-1": {"mdr": "drain-order-1"}, + "drain-order-5": {"mdr": "drain-order-5"}, + "skip": {"cluster.x-k8s.io/drain": string(clusterv1.MachineDrainRuleDrainBehaviorSkip)}, + } + for deploymentNamePrefix, deploymentLabels := range evictablePodDeployments { + framework.DeployEvictablePod(ctx, framework.DeployEvictablePodInput{ + WorkloadClusterProxy: workloadClusterProxy, + ControlPlane: controlplane, + DeploymentName: cpDeploymentName(deploymentNamePrefix), + Namespace: "evictable-workload", + NodeSelector: map[string]string{nodeOwnerLabelKey: "KubeadmControlPlane-" + controlplane.Name}, ModifyDeployment: func(deployment *appsv1.Deployment) { deployment.Spec.Template.ObjectMeta.Finalizers = []string{"test.cluster.x-k8s.io/block"} + for k, v := range deploymentLabels { + deployment.Spec.Template.Labels[k] = v + } }, WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), }) + for _, md := range machineDeployments { + framework.DeployEvictablePod(ctx, framework.DeployEvictablePodInput{ + WorkloadClusterProxy: workloadClusterProxy, + MachineDeployment: md, + DeploymentName: mdDeploymentName(deploymentNamePrefix, md.Name), + Namespace: "evictable-workload", + NodeSelector: map[string]string{nodeOwnerLabelKey: "MachineDeployment-" + md.Name}, + ModifyDeployment: func(deployment *appsv1.Deployment) { + deployment.Spec.Template.ObjectMeta.Finalizers = []string{"test.cluster.x-k8s.io/block"} + for k, v := range deploymentLabels { + deployment.Spec.Template.Labels[k] = v + } + }, + WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), + }) + } } - By("Trigger scale down the control plane to 1 and MachineDeployments to 0.") + By("Trigger Node drain by scaling down the control plane to 1 and MachineDeployments to 0.") modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{ ClusterProxy: input.BootstrapClusterProxy, Cluster: cluster, @@ -249,9 +265,11 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }) - By("Verify Node drains for control plane and MachineDeployment Machines are blocked (PDBs & Pods with finalizer") - var drainedCPMachine *clusterv1.Machine - var evictedCPPod *corev1.Pod + By("Get draining control plane and MachineDeployment Machines.") + var drainingCPMachineKey client.ObjectKey + var drainingCPNodeName string + drainingMDMachineKeys := map[string]client.ObjectKey{} + drainingMDNodeNames := map[string]string{} Eventually(func(g Gomega) { controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{ Lister: input.BootstrapClusterProxy.GetClient(), @@ -263,30 +281,13 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo condition = conditions.Get(&machine, clusterv1.DrainingSucceededCondition) if condition != nil { // We only expect to find the condition on one Machine (as KCP will only try to drain one Machine at a time) - drainedCPMachine = &machine - break + drainingCPMachineKey = client.ObjectKeyFromObject(&machine) + drainingCPNodeName = machine.Status.NodeRef.Name + return } } - g.Expect(condition).ToNot(BeNil()) - g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) - // The evictable Pod should be evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", cpDeploymentWithFinalizerName))) - // The unevictable Pod should not be evicted because of the PDB. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentAndPDBName))) - - // Verify evictable Pod was evicted and terminated (i.e. phase is succeeded) - evictedPods := &corev1.PodList{} - g.Expect(workloadClusterProxy.GetClient().List(ctx, evictedPods, - client.InNamespace("evictable-workload"), - client.MatchingLabels{"deployment": cpDeploymentWithFinalizerName}, - client.MatchingFieldsSelector{Selector: fields.OneTermEqualSelector("spec.nodeName", drainedCPMachine.Status.NodeRef.Name)}, - )).To(Succeed()) - g.Expect(evictedPods.Items).To(HaveLen(1)) - evictedCPPod = &evictedPods.Items[0] - verifyPodEvictedAndSucceeded(g, evictedCPPod) + g.Expect(drainingCPNodeName).ToNot(BeEmpty()) }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) - drainedMDMachines := map[string]*clusterv1.Machine{} - evictedMDPods := map[string]*corev1.Pod{} for _, md := range machineDeployments { Eventually(func(g Gomega) { machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{ @@ -296,49 +297,95 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo MachineDeployment: *md, }) g.Expect(machines).To(HaveLen(1)) - drainedMDMachines[md.Name] = &machines[0] - - condition := conditions.Get(&machines[0], clusterv1.DrainingSucceededCondition) - g.Expect(condition).ToNot(BeNil()) - g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) - // The evictable Pod should be evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", mdDeploymentWithFinalizerName[md.Name]))) - // The unevictable Pod should not be evicted because of the PDB. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentAndPDBNames[md.Name]))) - - // Verify evictable Pod was evicted and terminated (i.e. phase is succeeded) - evictedPods := &corev1.PodList{} - g.Expect(workloadClusterProxy.GetClient().List(ctx, evictedPods, - client.InNamespace("evictable-workload"), - client.MatchingLabels{"deployment": mdDeploymentWithFinalizerName[md.Name]}, - client.MatchingFieldsSelector{Selector: fields.OneTermEqualSelector("spec.nodeName", machines[0].Status.NodeRef.Name)}, - )).To(Succeed()) - g.Expect(evictedPods.Items).To(HaveLen(1)) - evictedMDPods[md.Name] = &evictedPods.Items[0] - verifyPodEvictedAndSucceeded(g, evictedMDPods[md.Name]) + drainingMDMachineKeys[md.Name] = client.ObjectKeyFromObject(&machines[0]) + drainingMDNodeNames[md.Name] = machines[0].Status.NodeRef.Name }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) } - By("Unblock deletion of evicted Pods by removing the finalizer") - Eventually(func(g Gomega) { - g.Expect(workloadClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(evictedCPPod), evictedCPPod)).To(Succeed()) - originalPod := evictedCPPod.DeepCopy() - evictedCPPod.Finalizers = []string{} - g.Expect(workloadClusterProxy.GetClient().Patch(ctx, evictedCPPod, client.MergeFrom(originalPod))).To(Succeed()) - }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) + By("Verify drain of Deployments with order 1.") + verifyNodeDrainsBlockedAndUnblock(ctx, verifyNodeDrainsBlockedAndUnblockInput{ + BootstrapClusterProxy: input.BootstrapClusterProxy, + WorkloadClusterProxy: workloadClusterProxy, + Cluster: cluster, + MachineDeployments: machineDeployments, + DrainedCPMachineKey: drainingCPMachineKey, + DrainedMDMachineKeys: drainingMDMachineKeys, + DeploymentNamePrefix: "drain-order-1", + CPConditionMessageSubstrings: []string{ + // The evictable Pod with order 1 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. + fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", cpDeploymentName("drain-order-1")), + // After the Pod with order 1 is gone, the drain continues with the Pod with order 5. + fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: evictable-workload/%s", cpDeploymentName("drain-order-5")), + }, + MDConditionMessageSubstrings: func() map[string][]string { + messageSubStrings := map[string][]string{} + for _, md := range machineDeployments { + messageSubStrings[md.Name] = []string{ + // The evictable Pod with order 1 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. + fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", mdDeploymentName("drain-order-1", md.Name)), + // After the Pod with order 1 is gone, the drain continues with the Pod with order 5. + fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: evictable-workload/%s", mdDeploymentName("drain-order-5", md.Name)), + } + } + return messageSubStrings + }(), + WaitForMachineDelete: input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), + }) + + By("Verify drain of Deployments with order 5.") + verifyNodeDrainsBlockedAndUnblock(ctx, verifyNodeDrainsBlockedAndUnblockInput{ + BootstrapClusterProxy: input.BootstrapClusterProxy, + WorkloadClusterProxy: workloadClusterProxy, + Cluster: cluster, + MachineDeployments: machineDeployments, + DrainedCPMachineKey: drainingCPMachineKey, + DrainedMDMachineKeys: drainingMDMachineKeys, + DeploymentNamePrefix: "drain-order-5", + CPConditionMessageSubstrings: []string{ + // The evictable Pod with order 5 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. + fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", cpDeploymentName("drain-order-5")), + // After the Pod with order 5 is gone, the drain continues with the unevictable Pod. + fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: unevictable-workload/%s", cpDeploymentWithPDBName()), + }, + MDConditionMessageSubstrings: func() map[string][]string { + messageSubStrings := map[string][]string{} + for _, md := range machineDeployments { + messageSubStrings[md.Name] = []string{ + // The evictable Pod with order 5 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. + fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", mdDeploymentName("drain-order-5", md.Name)), + // After the Pod with order 5 is gone, the drain continues with the unevictable Pod. + fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: unevictable-workload/%s", mdDeploymentWithPDBName(md.Name)), + } + } + return messageSubStrings + }(), + WaitForMachineDelete: input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), + }) + + By("Verify skipped Pods are still there and don't have a deletionTimestamp") + skippedCPPods := &corev1.PodList{} + Expect(workloadClusterProxy.GetClient().List(ctx, skippedCPPods, + client.InNamespace("evictable-workload"), + client.MatchingLabels{"deployment": cpDeploymentName("skip")}, + client.MatchingFieldsSelector{Selector: fields.OneTermEqualSelector("spec.nodeName", drainingCPNodeName)}, + )).To(Succeed()) + Expect(skippedCPPods.Items).To(HaveLen(1)) + Expect(skippedCPPods.Items[0].DeletionTimestamp.IsZero()).To(BeTrue()) for _, md := range machineDeployments { - Eventually(func(g Gomega) { - evictedMDPod := evictedMDPods[md.Name] - g.Expect(workloadClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(evictedMDPod), evictedMDPod)).To(Succeed()) - originalPod := evictedMDPod.DeepCopy() - evictedMDPod.Finalizers = []string{} - g.Expect(workloadClusterProxy.GetClient().Patch(ctx, evictedMDPod, client.MergeFrom(originalPod))).To(Succeed()) - }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) + skippedMDPods := &corev1.PodList{} + Expect(workloadClusterProxy.GetClient().List(ctx, skippedMDPods, + client.InNamespace("evictable-workload"), + client.MatchingLabels{"deployment": mdDeploymentName("skip", md.Name)}, + client.MatchingFieldsSelector{Selector: fields.OneTermEqualSelector("spec.nodeName", drainingMDNodeNames[md.Name])}, + )).To(Succeed()) + Expect(skippedMDPods.Items).To(HaveLen(1)) + Expect(skippedMDPods.Items[0].DeletionTimestamp.IsZero()).To(BeTrue()) } - By("Verify Node drains for control plane and MachineDeployment Machines are blocked (only PDBs") + By("Verify Node drains for control plane and MachineDeployment Machines are blocked (only by PDBs)") Eventually(func(g Gomega) { - g.Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(drainedCPMachine), drainedCPMachine)).To(Succeed()) + drainedCPMachine := &clusterv1.Machine{} + g.Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, drainingCPMachineKey, drainedCPMachine)).To(Succeed()) condition := conditions.Get(drainedCPMachine, clusterv1.DrainingSucceededCondition) g.Expect(condition).ToNot(BeNil()) @@ -346,19 +393,20 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo // The evictable Pod should be gone now. g.Expect(condition.Message).ToNot(ContainSubstring("Pods with deletionTimestamp that still exist")) // The unevictable Pod should still not be evicted because of the PDB. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentAndPDBName))) + g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentWithPDBName()))) }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) for _, md := range machineDeployments { Eventually(func(g Gomega) { - g.Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(drainedMDMachines[md.Name]), drainedMDMachines[md.Name])).To(Succeed()) + drainedMDMachine := &clusterv1.Machine{} + g.Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, drainingMDMachineKeys[md.Name], drainedMDMachine)).To(Succeed()) - condition := conditions.Get(drainedMDMachines[md.Name], clusterv1.DrainingSucceededCondition) + condition := conditions.Get(drainedMDMachine, clusterv1.DrainingSucceededCondition) g.Expect(condition).ToNot(BeNil()) g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) // The evictable Pod should be gone now. g.Expect(condition.Message).ToNot(ContainSubstring("Pods with deletionTimestamp that still exist")) // The unevictable Pod should still not be evicted because of the PDB. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentAndPDBNames[md.Name]))) + g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentWithPDBName(md.Name)))) }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) } @@ -416,6 +464,158 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo }) } +func generateMachineDrainRule(clusterNamespace, clusterName, mdrName string, order int32) *clusterv1.MachineDrainRule { + return &clusterv1.MachineDrainRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: mdrName, + Namespace: clusterNamespace, + }, + Spec: clusterv1.MachineDrainRuleSpec{ + Drain: clusterv1.MachineDrainRuleDrainConfig{ + Behavior: clusterv1.MachineDrainRuleDrainBehaviorDrain, + Order: ptr.To[int32](order), + }, + Machines: []clusterv1.MachineDrainRuleMachineSelector{ + // Select all Machines with the ClusterNameLabel belonging to Clusters with the ClusterNameLabel. + { + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + ClusterSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + }, + }, + Pods: []clusterv1.MachineDrainRulePodSelector{ + // Select all Pods with label "mdr": mdrName in all Namespaces except "kube-system". + { + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "mdr": mdrName, + }, + }, + NamespaceSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{ + metav1.NamespaceSystem, + }, + }, + }, + }, + }, + }, + }, + } +} + +func cpDeploymentName(prefix string) string { + return fmt.Sprintf("%s-%s", prefix, "cp") +} + +func cpDeploymentWithPDBName() string { + return "unevictable-cp" +} + +func mdDeploymentName(prefix, name string) string { + return fmt.Sprintf("%s-%s", prefix, name) +} + +func mdDeploymentWithPDBName(name string) string { + return fmt.Sprintf("unevictable-%s", name) +} + +type verifyNodeDrainsBlockedAndUnblockInput struct { + BootstrapClusterProxy framework.ClusterProxy + WorkloadClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + MachineDeployments []*clusterv1.MachineDeployment + DrainedCPMachineKey client.ObjectKey + DrainedMDMachineKeys map[string]client.ObjectKey + DeploymentNamePrefix string + CPConditionMessageSubstrings []string + MDConditionMessageSubstrings map[string][]string + WaitForMachineDelete []interface{} +} + +func verifyNodeDrainsBlockedAndUnblock(ctx context.Context, input verifyNodeDrainsBlockedAndUnblockInput) { + By(fmt.Sprintf("Verify Node drains for control plane and MachineDeployment Machines are blocked (%s)", input.DeploymentNamePrefix)) + var evictedCPPod *corev1.Pod + Eventually(func(g Gomega) { + drainedCPMachine := &clusterv1.Machine{} + g.Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, input.DrainedCPMachineKey, drainedCPMachine)).To(Succeed()) + + // Verify condition on drained CP Machine. + condition := conditions.Get(drainedCPMachine, clusterv1.DrainingSucceededCondition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + for _, messageSubstring := range input.CPConditionMessageSubstrings { + g.Expect(condition.Message).To(ContainSubstring(messageSubstring)) + } + + // Verify evictable Pod was evicted and terminated (i.e. phase is succeeded) + evictedPods := &corev1.PodList{} + g.Expect(input.WorkloadClusterProxy.GetClient().List(ctx, evictedPods, + client.InNamespace("evictable-workload"), + client.MatchingLabels{"deployment": cpDeploymentName(input.DeploymentNamePrefix)}, + client.MatchingFieldsSelector{Selector: fields.OneTermEqualSelector("spec.nodeName", drainedCPMachine.Status.NodeRef.Name)}, + )).To(Succeed()) + g.Expect(evictedPods.Items).To(HaveLen(1)) + evictedCPPod = &evictedPods.Items[0] + verifyPodEvictedAndSucceeded(g, evictedCPPod) + }, input.WaitForMachineDelete...).Should(Succeed()) + + evictedMDPods := map[string]*corev1.Pod{} + for _, md := range input.MachineDeployments { + Eventually(func(g Gomega) { + drainedMDMachine := &clusterv1.Machine{} + g.Expect(input.BootstrapClusterProxy.GetClient().Get(ctx, input.DrainedMDMachineKeys[md.Name], drainedMDMachine)).To(Succeed()) + + // Verify condition on drained MD Machine. + condition := conditions.Get(drainedMDMachine, clusterv1.DrainingSucceededCondition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + for _, messageSubstring := range input.MDConditionMessageSubstrings[md.Name] { + g.Expect(condition.Message).To(ContainSubstring(messageSubstring)) + } + + // Verify evictable Pod was evicted and terminated (i.e. phase is succeeded) + evictedPods := &corev1.PodList{} + g.Expect(input.WorkloadClusterProxy.GetClient().List(ctx, evictedPods, + client.InNamespace("evictable-workload"), + client.MatchingLabels{"deployment": mdDeploymentName(input.DeploymentNamePrefix, md.Name)}, + client.MatchingFieldsSelector{Selector: fields.OneTermEqualSelector("spec.nodeName", drainedMDMachine.Status.NodeRef.Name)}, + )).To(Succeed()) + g.Expect(evictedPods.Items).To(HaveLen(1)) + evictedMDPods[md.Name] = &evictedPods.Items[0] + verifyPodEvictedAndSucceeded(g, evictedMDPods[md.Name]) + }, input.WaitForMachineDelete...).Should(Succeed()) + } + + By(fmt.Sprintf("Unblock deletion of evicted Pods by removing the finalizer (%s)", input.DeploymentNamePrefix)) + Eventually(func(g Gomega) { + g.Expect(input.WorkloadClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(evictedCPPod), evictedCPPod)).To(Succeed()) + originalPod := evictedCPPod.DeepCopy() + evictedCPPod.Finalizers = []string{} + g.Expect(input.WorkloadClusterProxy.GetClient().Patch(ctx, evictedCPPod, client.MergeFrom(originalPod))).To(Succeed()) + }, input.WaitForMachineDelete...).Should(Succeed()) + for _, md := range input.MachineDeployments { + Eventually(func(g Gomega) { + evictedMDPod := evictedMDPods[md.Name] + g.Expect(input.WorkloadClusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(evictedMDPod), evictedMDPod)).To(Succeed()) + originalPod := evictedMDPod.DeepCopy() + evictedMDPod.Finalizers = []string{} + g.Expect(input.WorkloadClusterProxy.GetClient().Patch(ctx, evictedMDPod, client.MergeFrom(originalPod))).To(Succeed()) + }, input.WaitForMachineDelete...).Should(Succeed()) + } +} + func verifyPodEvictedAndSucceeded(g Gomega, pod *corev1.Pod) { g.Expect(pod.Status.Phase).To(Equal(corev1.PodSucceeded)) podEvicted := false diff --git a/test/e2e/node_drain_timeout_test.go b/test/e2e/node_drain_test.go similarity index 94% rename from test/e2e/node_drain_timeout_test.go rename to test/e2e/node_drain_test.go index 882c9621ff5c..bdb899972f7b 100644 --- a/test/e2e/node_drain_timeout_test.go +++ b/test/e2e/node_drain_test.go @@ -24,7 +24,7 @@ import ( "k8s.io/utils/ptr" ) -var _ = Describe("When testing node drain timeout", func() { +var _ = Describe("When testing Node drain", func() { NodeDrainTimeoutSpec(ctx, func() NodeDrainTimeoutSpecInput { return NodeDrainTimeoutSpecInput{ E2EConfig: e2eConfig, diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go index f31a03eed7a5..a3979031f253 100644 --- a/test/framework/deployment_helpers.go +++ b/test/framework/deployment_helpers.go @@ -496,6 +496,8 @@ type DeployUnevictablePodInput struct { Namespace string NodeSelector map[string]string + ModifyDeployment func(deployment *appsv1.Deployment) + WaitForDeploymentAvailableInterval []interface{} } @@ -518,6 +520,8 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) NodeSelector: input.NodeSelector, }) + input.ModifyDeployment(workloadDeployment) + workloadClient := input.WorkloadClusterProxy.GetClientSet() AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{