Skip to content

Commit

Permalink
Merge pull request #440 from Danil-Grigorev/stabilize-e2e
Browse files Browse the repository at this point in the history
🐛 E2E failures in CI
  • Loading branch information
Danil-Grigorev authored Sep 16, 2024
2 parents 967b2da + 4578c23 commit 465f030
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 52 deletions.
113 changes: 105 additions & 8 deletions test/e2e/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ import (
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/cmd/clusterctl/client/config"
"sigs.k8s.io/cluster-api/test/framework"
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/yaml"
)

Expand All @@ -55,16 +57,11 @@ func Byf(format string, a ...interface{}) {
By(fmt.Sprintf(format, a...))
}

func setupSpecNamespace(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, artifactFolder string) (*corev1.Namespace, context.CancelFunc) {
func setupSpecNamespace(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, _ string) (*corev1.Namespace, context.CancelFunc) {
Byf("Creating a namespace for hosting the %q test spec", specName)
namespace, cancelWatches := framework.CreateNamespaceAndWatchEvents(ctx, framework.CreateNamespaceAndWatchEventsInput{
Creator: clusterProxy.GetClient(),
ClientSet: clusterProxy.GetClientSet(),
Name: fmt.Sprintf("%s-%s", specName, util.RandomString(6)),
LogFolder: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName()),
})

return namespace, cancelWatches
_, cancelWatches := context.WithCancel(ctx)
return framework.CreateNamespace(ctx, framework.CreateNamespaceInput{Creator: clusterProxy.GetClient(), Name: fmt.Sprintf("%s-%s", specName, util.RandomString(6))}, "40s", "10s"), cancelWatches
}

func cleanupInstallation(ctx context.Context, clusterctlLogFolder, clusterctlConfigPath string, proxy framework.ClusterProxy) func() {
Expand Down Expand Up @@ -191,3 +188,103 @@ func localLoadE2EConfig(configPath string) *clusterctl.E2EConfig {

return config
}

// UpgradeManagementCluster upgrades provider a management cluster using clusterctl, and waits for the cluster to be ready.
func UpgradeManagementCluster(ctx context.Context, input clusterctl.UpgradeManagementClusterAndWaitInput) {
Expect(ctx).NotTo(BeNil(), "ctx is required for UpgradeManagementCluster")
Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling UpgradeManagementCluster")
Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling UpgradeManagementCluster")

// Check if the user want a custom upgrade
isCustomUpgrade := input.CoreProvider != "" ||
len(input.BootstrapProviders) > 0 ||
len(input.ControlPlaneProviders) > 0 ||
len(input.InfrastructureProviders) > 0 ||
len(input.IPAMProviders) > 0 ||
len(input.RuntimeExtensionProviders) > 0 ||
len(input.AddonProviders) > 0

Expect((input.Contract != "" && !isCustomUpgrade) || (input.Contract == "" && isCustomUpgrade)).To(BeTrue(), `Invalid argument. Either the input.Contract parameter or at least one of the following providers has to be set:
input.CoreProvider, input.BootstrapProviders, input.ControlPlaneProviders, input.InfrastructureProviders, input.IPAMProviders, input.RuntimeExtensionProviders, input.AddonProviders`)

Expect(os.MkdirAll(input.LogFolder, 0750)).To(Succeed(), "Invalid argument. input.LogFolder can't be created for UpgradeManagementClusterAndWait")

upgradeInput := clusterctl.UpgradeInput{
ClusterctlConfigPath: input.ClusterctlConfigPath,
ClusterctlVariables: input.ClusterctlVariables,
ClusterName: input.ClusterProxy.GetName(),
KubeconfigPath: input.ClusterProxy.GetKubeconfigPath(),
Contract: input.Contract,
CoreProvider: input.CoreProvider,
BootstrapProviders: input.BootstrapProviders,
ControlPlaneProviders: input.ControlPlaneProviders,
InfrastructureProviders: input.InfrastructureProviders,
IPAMProviders: input.IPAMProviders,
RuntimeExtensionProviders: input.RuntimeExtensionProviders,
AddonProviders: input.AddonProviders,
LogFolder: input.LogFolder,
}

clusterctl.Upgrade(ctx, upgradeInput)

// We have to skip collecting metrics, as it causes failures in CI
}

// InitManagementCluster initializes a management using clusterctl.
func InitManagementCluster(ctx context.Context, input clusterctl.InitManagementClusterAndWatchControllerLogsInput, intervals ...interface{}) {
Expect(ctx).NotTo(BeNil(), "ctx is required for InitManagementCluster")
Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling InitManagementCluster")
Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling InitManagementCluster")
Expect(input.InfrastructureProviders).ToNot(BeEmpty(), "Invalid argument. input.InfrastructureProviders can't be empty when calling InitManagementCluster")
Expect(os.MkdirAll(input.LogFolder, 0750)).To(Succeed(), "Invalid argument. input.LogFolder can't be created for InitManagementCluster")

logger := log.FromContext(ctx)

if input.CoreProvider == "" {
input.CoreProvider = config.ClusterAPIProviderName
}
if len(input.BootstrapProviders) == 0 {
input.BootstrapProviders = []string{config.KubeadmBootstrapProviderName}
}
if len(input.ControlPlaneProviders) == 0 {
input.ControlPlaneProviders = []string{config.KubeadmControlPlaneProviderName}
}

client := input.ClusterProxy.GetClient()
controllersDeployments := framework.GetControllerDeployments(ctx, framework.GetControllerDeploymentsInput{
Lister: client,
})
if len(controllersDeployments) == 0 {
initInput := clusterctl.InitInput{
// pass reference to the management cluster hosting this test
KubeconfigPath: input.ClusterProxy.GetKubeconfigPath(),
// pass the clusterctl config file that points to the local provider repository created for this test
ClusterctlConfigPath: input.ClusterctlConfigPath,
// setup the desired list of providers for a single-tenant management cluster
CoreProvider: input.CoreProvider,
BootstrapProviders: input.BootstrapProviders,
ControlPlaneProviders: input.ControlPlaneProviders,
InfrastructureProviders: input.InfrastructureProviders,
IPAMProviders: input.IPAMProviders,
RuntimeExtensionProviders: input.RuntimeExtensionProviders,
AddonProviders: input.AddonProviders,
// setup clusterctl logs folder
LogFolder: input.LogFolder,
}

clusterctl.Init(ctx, initInput)
}

logger.Info("Waiting for provider controllers to be running")

controllersDeployments = framework.GetControllerDeployments(ctx, framework.GetControllerDeploymentsInput{
Lister: client,
})
Expect(controllersDeployments).ToNot(BeEmpty(), "The list of controller deployments should not be empty")
for _, deployment := range controllersDeployments {
framework.WaitForDeploymentsAvailable(ctx, framework.WaitForDeploymentsAvailableInput{
Getter: client,
Deployment: deployment,
}, intervals...)
}
}
4 changes: 3 additions & 1 deletion test/e2e/e2e_clusterclass_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ var _ = Describe("Workload cluster creation", func() {
}
})
Expect(err).ToNot(HaveOccurred())
Expect(bootstrapClusterProxy.Apply(ctx, []byte(clusterClassConfig))).To(Succeed(), "Failed to apply ClusterClass definition")
Eventually(func() error {
return bootstrapClusterProxy.Apply(ctx, []byte(clusterClassConfig))
}, e2eConfig.GetIntervals(specName, "wait-cluster")...).Should(Succeed(), "Failed to apply ClusterClass definition")

By("Create a Docker Cluster from topology")

Expand Down
6 changes: 4 additions & 2 deletions test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ func setupBootstrapCluster(config *clusterctl.E2EConfig, scheme *runtime.Scheme,

// initBootstrapCluster initializes a bootstrap cluster with the latest minor version.
func initBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy, config *clusterctl.E2EConfig, clusterctlConfig, artifactFolder string) {
clusterctl.InitManagementClusterAndWatchControllerLogs(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
InitManagementCluster(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
ClusterProxy: bootstrapClusterProxy,
ClusterctlConfigPath: clusterctlConfig,
InfrastructureProviders: config.InfrastructureProviders(),
Expand All @@ -245,13 +245,14 @@ func initBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy, config *
BootstrapProviders: []string{"rke2-bootstrap"},
ControlPlaneProviders: []string{"rke2-control-plane"},
LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()),
DisableMetricsCollection: true,
}, config.GetIntervals(bootstrapClusterProxy.GetName(), "wait-controllers")...)
}

// initUpgradableBootstrapCluster initializes a bootstrap cluster with the latest minor version N-1 and used to perform an upgrade to the latest version.
// Make sure to update the version in the providers list to the latest minor version N-1.
func initUpgradableBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy, config *clusterctl.E2EConfig, clusterctlConfig, artifactFolder string) {
clusterctl.InitManagementClusterAndWatchControllerLogs(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
InitManagementCluster(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
ClusterProxy: bootstrapClusterProxy,
ClusterctlConfigPath: clusterctlConfig,
InfrastructureProviders: config.InfrastructureProviders(),
Expand All @@ -260,6 +261,7 @@ func initUpgradableBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy
BootstrapProviders: []string{"rke2-bootstrap:v0.6.0"},
ControlPlaneProviders: []string{"rke2-control-plane:v0.6.0"},
LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()),
DisableMetricsCollection: true,
}, config.GetIntervals(bootstrapClusterProxy.GetName(), "wait-controllers")...)
}

Expand Down
2 changes: 1 addition & 1 deletion test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ var _ = Describe("Workload cluster creation", func() {
}, result)

WaitForClusterToUpgrade(ctx, WaitForClusterToUpgradeInput{
Lister: bootstrapClusterProxy.GetClient(),
Reader: bootstrapClusterProxy.GetClient(),
ControlPlane: result.ControlPlane,
MachineDeployments: result.MachineDeployments,
VersionAfterUpgrade: e2eConfig.GetVariable(KubernetesVersionUpgradeTo),
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/e2e_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ var _ = Describe("Workload cluster creation", func() {
}, e2eConfig.GetIntervals(specName, "wait-control-plane")...)

By("Upgrading to latest boostrap/controlplane provider version")
clusterctl.UpgradeManagementClusterAndWait(ctx, clusterctl.UpgradeManagementClusterAndWaitInput{
UpgradeManagementCluster(ctx, clusterctl.UpgradeManagementClusterAndWaitInput{
ClusterProxy: bootstrapClusterProxy,
ClusterctlConfigPath: clusterctlConfigPath,
BootstrapProviders: []string{"rke2-bootstrap:v0.7.99"},
ControlPlaneProviders: []string{"rke2-control-plane:v0.7.99"},
LogFolder: clusterctlLogFolder,
}, e2eConfig.GetIntervals(specName, "wait-controllers")...)
})

WaitForControlPlaneToBeReady(ctx, WaitForControlPlaneToBeReadyInput{
Getter: bootstrapClusterProxy.GetClient(),
Expand Down Expand Up @@ -174,7 +174,7 @@ var _ = Describe("Workload cluster creation", func() {
}, result)

WaitForClusterToUpgrade(ctx, WaitForClusterToUpgradeInput{
Lister: bootstrapClusterProxy.GetClient(),
Reader: bootstrapClusterProxy.GetClient(),
ControlPlane: result.ControlPlane,
MachineDeployments: result.MachineDeployments,
VersionAfterUpgrade: e2eConfig.GetVariable(KubernetesVersionUpgradeTo),
Expand Down
76 changes: 39 additions & 37 deletions test/e2e/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"

Expand Down Expand Up @@ -138,14 +137,6 @@ func ApplyClusterTemplateAndWait(ctx context.Context, input ApplyClusterTemplate
})
Expect(workloadClusterTemplate).ToNot(BeNil(), "Failed to get the cluster template")

// Ensure we have a Cluster for dump and cleanup steps in AfterEach even if ApplyClusterTemplateAndWait fails.
result.Cluster = &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: input.ConfigCluster.ClusterName,
Namespace: input.ConfigCluster.Namespace,
},
}

ApplyCustomClusterTemplateAndWait(ctx, ApplyCustomClusterTemplateAndWaitInput{
ClusterProxy: input.ClusterProxy,
CustomTemplateYAML: workloadClusterTemplate,
Expand Down Expand Up @@ -174,19 +165,10 @@ func ApplyCustomClusterTemplateAndWait(ctx context.Context, input ApplyCustomClu

Byf("Creating the workload cluster with name %q from the provided yaml", input.ClusterName)

// Ensure we have a Cluster for dump and cleanup steps in AfterEach even if ApplyClusterTemplateAndWait fails.
result.Cluster = &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: input.ClusterName,
Namespace: input.Namespace,
},
}

Byf("Applying the cluster template yaml of cluster %s", klog.KRef(input.Namespace, input.ClusterName))
Eventually(func() error {
return input.ClusterProxy.Apply(ctx, input.CustomTemplateYAML, input.Args...)
// return input.ClusterProxy.CreateOrUpdate(ctx, input.CustomTemplateYAML, input.CreateOrUpdateOpts...)
}, 1*time.Minute).Should(Succeed(), "Failed to apply the cluster template")
}, input.WaitForClusterIntervals...).Should(Succeed(), "Failed to apply the cluster template")

// Once we applied the cluster template we can run PreWaitForCluster.
// Note: This can e.g. be used to verify the BeforeClusterCreate lifecycle hook is executed
Expand Down Expand Up @@ -218,7 +200,7 @@ func ApplyCustomClusterTemplateAndWait(ctx context.Context, input ApplyCustomClu
input.WaitForControlPlaneMachinesReady(ctx, input, result)

Byf("Waiting for the machine deployments of cluster %s to be provisioned", klog.KRef(input.Namespace, input.ClusterName))
result.MachineDeployments = framework.DiscoveryAndWaitForMachineDeployments(ctx, framework.DiscoveryAndWaitForMachineDeploymentsInput{
result.MachineDeployments = DiscoveryAndWaitForMachineDeployments(ctx, framework.DiscoveryAndWaitForMachineDeploymentsInput{
Lister: input.ClusterProxy.GetClient(),
Cluster: result.Cluster,
}, input.WaitForMachineDeployments...)
Expand Down Expand Up @@ -285,7 +267,7 @@ func DiscoveryAndWaitForRKE2ControlPlaneInitialized(ctx context.Context, input D
Namespace: input.Cluster.Namespace,
})
g.Expect(controlPlane).ToNot(BeNil())
}, "10s", "1s").Should(Succeed(), "Couldn't get the control plane for the cluster %s", klog.KObj(input.Cluster))
}, "2m", "1s").Should(Succeed(), "Couldn't get the control plane for the cluster %s", klog.KObj(input.Cluster))

return controlPlane
}
Expand Down Expand Up @@ -445,7 +427,7 @@ func WaitForMachineConditions(ctx context.Context, input WaitForMachineCondition

// WaitForClusterToUpgradeInput is the input for WaitForClusterToUpgrade.
type WaitForClusterToUpgradeInput struct {
Lister framework.Lister
Reader framework.GetLister
ControlPlane *controlplanev1.RKE2ControlPlane
MachineDeployments []*clusterv1.MachineDeployment
VersionAfterUpgrade string
Expand All @@ -455,32 +437,52 @@ type WaitForClusterToUpgradeInput struct {
func WaitForClusterToUpgrade(ctx context.Context, input WaitForClusterToUpgradeInput, intervals ...interface{}) {
By("Waiting for machines to update")

var totalMachineCount int32
totalMachineCount = *input.ControlPlane.Spec.Replicas
Eventually(func() error {
cp := input.ControlPlane.DeepCopy()
if err := input.Reader.Get(ctx, client.ObjectKeyFromObject(input.ControlPlane), cp); err != nil {
return fmt.Errorf("failed to get control plane: %w", err)
}

for _, md := range input.MachineDeployments {
totalMachineCount += *md.Spec.Replicas
}
updatedDeployments := []*clusterv1.MachineDeployment{}
for _, md := range input.MachineDeployments {
copy := &clusterv1.MachineDeployment{}
if err := input.Reader.Get(ctx, client.ObjectKeyFromObject(md), copy); client.IgnoreNotFound(err) != nil {
return fmt.Errorf("failed to get updated machine deployment: %w", err)
}

Eventually(func() (bool, error) {
machineList := &clusterv1.MachineList{}
if err := input.Lister.List(ctx, machineList); err != nil {
return false, fmt.Errorf("failed to list machines: %w", err)
updatedDeployments = append(updatedDeployments, copy)
}

if len(machineList.Items) != int(totalMachineCount) { // not all replicas are created
return false, nil
machineList := &clusterv1.MachineList{}
if err := input.Reader.List(ctx, machineList); err != nil {
return fmt.Errorf("failed to list machines: %w", err)
}

for _, machine := range machineList.Items {
expectedVersion := input.VersionAfterUpgrade + "+rke2r1"
if machine.Spec.Version != nil && *machine.Spec.Version != expectedVersion {
return false, nil
if machine.Spec.Version == nil || *machine.Spec.Version != expectedVersion {
return fmt.Errorf("Expected machine version to match %s, got %v", expectedVersion, machine.Spec.Version)
}
}

return true, nil
}, intervals...).Should(BeTrue(), framework.PrettyPrint(input.ControlPlane))
ready := cp.Status.ReadyReplicas == cp.Status.Replicas
if !ready {
return fmt.Errorf("Control plane is not ready: %d ready from %d", cp.Status.ReadyReplicas, cp.Status.Replicas)
}

expected := cp.Spec.Replicas != nil && *cp.Spec.Replicas == cp.Status.Replicas
if !expected {
return fmt.Errorf("Control plane is not scaled: %d replicas from %d", cp.Spec.Replicas, cp.Status.Replicas)
}

for _, md := range updatedDeployments {
if md.Spec.Replicas == nil || *md.Spec.Replicas != md.Status.ReadyReplicas {
return fmt.Errorf("Not all machine deployments are updated yet expected %v!=%d", md.Spec.Replicas, md.Status.ReadyReplicas)
}
}

return nil
}, intervals...).Should(Succeed())
}

// setDefaults sets the default values for ApplyCustomClusterTemplateAndWaitInput if not set.
Expand Down

0 comments on commit 465f030

Please sign in to comment.