Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 E2E failures in CI #440

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 105 additions & 8 deletions test/e2e/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ import (
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/cmd/clusterctl/client/config"
"sigs.k8s.io/cluster-api/test/framework"
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/yaml"
)

Expand All @@ -55,16 +57,11 @@ func Byf(format string, a ...interface{}) {
By(fmt.Sprintf(format, a...))
}

func setupSpecNamespace(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, artifactFolder string) (*corev1.Namespace, context.CancelFunc) {
func setupSpecNamespace(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, _ string) (*corev1.Namespace, context.CancelFunc) {
Byf("Creating a namespace for hosting the %q test spec", specName)
namespace, cancelWatches := framework.CreateNamespaceAndWatchEvents(ctx, framework.CreateNamespaceAndWatchEventsInput{
Creator: clusterProxy.GetClient(),
ClientSet: clusterProxy.GetClientSet(),
Name: fmt.Sprintf("%s-%s", specName, util.RandomString(6)),
LogFolder: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName()),
})

return namespace, cancelWatches
_, cancelWatches := context.WithCancel(ctx)
return framework.CreateNamespace(ctx, framework.CreateNamespaceInput{Creator: clusterProxy.GetClient(), Name: fmt.Sprintf("%s-%s", specName, util.RandomString(6))}, "40s", "10s"), cancelWatches
}

func cleanupInstallation(ctx context.Context, clusterctlLogFolder, clusterctlConfigPath string, proxy framework.ClusterProxy) func() {
Expand Down Expand Up @@ -191,3 +188,103 @@ func localLoadE2EConfig(configPath string) *clusterctl.E2EConfig {

return config
}

// UpgradeManagementCluster upgrades provider a management cluster using clusterctl, and waits for the cluster to be ready.
func UpgradeManagementCluster(ctx context.Context, input clusterctl.UpgradeManagementClusterAndWaitInput) {
Expect(ctx).NotTo(BeNil(), "ctx is required for UpgradeManagementCluster")
Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling UpgradeManagementCluster")
Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling UpgradeManagementCluster")

// Check if the user want a custom upgrade
isCustomUpgrade := input.CoreProvider != "" ||
len(input.BootstrapProviders) > 0 ||
len(input.ControlPlaneProviders) > 0 ||
len(input.InfrastructureProviders) > 0 ||
len(input.IPAMProviders) > 0 ||
len(input.RuntimeExtensionProviders) > 0 ||
len(input.AddonProviders) > 0

Expect((input.Contract != "" && !isCustomUpgrade) || (input.Contract == "" && isCustomUpgrade)).To(BeTrue(), `Invalid argument. Either the input.Contract parameter or at least one of the following providers has to be set:
input.CoreProvider, input.BootstrapProviders, input.ControlPlaneProviders, input.InfrastructureProviders, input.IPAMProviders, input.RuntimeExtensionProviders, input.AddonProviders`)

Expect(os.MkdirAll(input.LogFolder, 0750)).To(Succeed(), "Invalid argument. input.LogFolder can't be created for UpgradeManagementClusterAndWait")

upgradeInput := clusterctl.UpgradeInput{
ClusterctlConfigPath: input.ClusterctlConfigPath,
ClusterctlVariables: input.ClusterctlVariables,
ClusterName: input.ClusterProxy.GetName(),
KubeconfigPath: input.ClusterProxy.GetKubeconfigPath(),
Contract: input.Contract,
CoreProvider: input.CoreProvider,
BootstrapProviders: input.BootstrapProviders,
ControlPlaneProviders: input.ControlPlaneProviders,
InfrastructureProviders: input.InfrastructureProviders,
IPAMProviders: input.IPAMProviders,
RuntimeExtensionProviders: input.RuntimeExtensionProviders,
AddonProviders: input.AddonProviders,
LogFolder: input.LogFolder,
}

clusterctl.Upgrade(ctx, upgradeInput)

// We have to skip collecting metrics, as it causes failures in CI
}

// InitManagementCluster initializes a management using clusterctl.
func InitManagementCluster(ctx context.Context, input clusterctl.InitManagementClusterAndWatchControllerLogsInput, intervals ...interface{}) {
Expect(ctx).NotTo(BeNil(), "ctx is required for InitManagementCluster")
Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling InitManagementCluster")
Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling InitManagementCluster")
Expect(input.InfrastructureProviders).ToNot(BeEmpty(), "Invalid argument. input.InfrastructureProviders can't be empty when calling InitManagementCluster")
Expect(os.MkdirAll(input.LogFolder, 0750)).To(Succeed(), "Invalid argument. input.LogFolder can't be created for InitManagementCluster")

logger := log.FromContext(ctx)

if input.CoreProvider == "" {
input.CoreProvider = config.ClusterAPIProviderName
}
if len(input.BootstrapProviders) == 0 {
input.BootstrapProviders = []string{config.KubeadmBootstrapProviderName}
}
if len(input.ControlPlaneProviders) == 0 {
input.ControlPlaneProviders = []string{config.KubeadmControlPlaneProviderName}
}

client := input.ClusterProxy.GetClient()
controllersDeployments := framework.GetControllerDeployments(ctx, framework.GetControllerDeploymentsInput{
Lister: client,
})
if len(controllersDeployments) == 0 {
initInput := clusterctl.InitInput{
// pass reference to the management cluster hosting this test
KubeconfigPath: input.ClusterProxy.GetKubeconfigPath(),
// pass the clusterctl config file that points to the local provider repository created for this test
ClusterctlConfigPath: input.ClusterctlConfigPath,
// setup the desired list of providers for a single-tenant management cluster
CoreProvider: input.CoreProvider,
BootstrapProviders: input.BootstrapProviders,
ControlPlaneProviders: input.ControlPlaneProviders,
InfrastructureProviders: input.InfrastructureProviders,
IPAMProviders: input.IPAMProviders,
RuntimeExtensionProviders: input.RuntimeExtensionProviders,
AddonProviders: input.AddonProviders,
// setup clusterctl logs folder
LogFolder: input.LogFolder,
}

clusterctl.Init(ctx, initInput)
}

logger.Info("Waiting for provider controllers to be running")

controllersDeployments = framework.GetControllerDeployments(ctx, framework.GetControllerDeploymentsInput{
Lister: client,
})
Expect(controllersDeployments).ToNot(BeEmpty(), "The list of controller deployments should not be empty")
for _, deployment := range controllersDeployments {
framework.WaitForDeploymentsAvailable(ctx, framework.WaitForDeploymentsAvailableInput{
Getter: client,
Deployment: deployment,
}, intervals...)
}
}
4 changes: 3 additions & 1 deletion test/e2e/e2e_clusterclass_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ var _ = Describe("Workload cluster creation", func() {
}
})
Expect(err).ToNot(HaveOccurred())
Expect(bootstrapClusterProxy.Apply(ctx, []byte(clusterClassConfig))).To(Succeed(), "Failed to apply ClusterClass definition")
Eventually(func() error {
return bootstrapClusterProxy.Apply(ctx, []byte(clusterClassConfig))
}, e2eConfig.GetIntervals(specName, "wait-cluster")...).Should(Succeed(), "Failed to apply ClusterClass definition")

By("Create a Docker Cluster from topology")

Expand Down
6 changes: 4 additions & 2 deletions test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ func setupBootstrapCluster(config *clusterctl.E2EConfig, scheme *runtime.Scheme,

// initBootstrapCluster initializes a bootstrap cluster with the latest minor version.
func initBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy, config *clusterctl.E2EConfig, clusterctlConfig, artifactFolder string) {
clusterctl.InitManagementClusterAndWatchControllerLogs(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
InitManagementCluster(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
ClusterProxy: bootstrapClusterProxy,
ClusterctlConfigPath: clusterctlConfig,
InfrastructureProviders: config.InfrastructureProviders(),
Expand All @@ -245,13 +245,14 @@ func initBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy, config *
BootstrapProviders: []string{"rke2-bootstrap"},
ControlPlaneProviders: []string{"rke2-control-plane"},
LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()),
DisableMetricsCollection: true,
}, config.GetIntervals(bootstrapClusterProxy.GetName(), "wait-controllers")...)
}

// initUpgradableBootstrapCluster initializes a bootstrap cluster with the latest minor version N-1 and used to perform an upgrade to the latest version.
// Make sure to update the version in the providers list to the latest minor version N-1.
func initUpgradableBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy, config *clusterctl.E2EConfig, clusterctlConfig, artifactFolder string) {
clusterctl.InitManagementClusterAndWatchControllerLogs(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
InitManagementCluster(context.TODO(), clusterctl.InitManagementClusterAndWatchControllerLogsInput{
ClusterProxy: bootstrapClusterProxy,
ClusterctlConfigPath: clusterctlConfig,
InfrastructureProviders: config.InfrastructureProviders(),
Expand All @@ -260,6 +261,7 @@ func initUpgradableBootstrapCluster(bootstrapClusterProxy framework.ClusterProxy
BootstrapProviders: []string{"rke2-bootstrap:v0.6.0"},
ControlPlaneProviders: []string{"rke2-control-plane:v0.6.0"},
LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()),
DisableMetricsCollection: true,
}, config.GetIntervals(bootstrapClusterProxy.GetName(), "wait-controllers")...)
}

Expand Down
2 changes: 1 addition & 1 deletion test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ var _ = Describe("Workload cluster creation", func() {
}, result)

WaitForClusterToUpgrade(ctx, WaitForClusterToUpgradeInput{
Lister: bootstrapClusterProxy.GetClient(),
Reader: bootstrapClusterProxy.GetClient(),
ControlPlane: result.ControlPlane,
MachineDeployments: result.MachineDeployments,
VersionAfterUpgrade: e2eConfig.GetVariable(KubernetesVersionUpgradeTo),
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/e2e_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ var _ = Describe("Workload cluster creation", func() {
}, e2eConfig.GetIntervals(specName, "wait-control-plane")...)

By("Upgrading to latest boostrap/controlplane provider version")
clusterctl.UpgradeManagementClusterAndWait(ctx, clusterctl.UpgradeManagementClusterAndWaitInput{
UpgradeManagementCluster(ctx, clusterctl.UpgradeManagementClusterAndWaitInput{
ClusterProxy: bootstrapClusterProxy,
ClusterctlConfigPath: clusterctlConfigPath,
BootstrapProviders: []string{"rke2-bootstrap:v0.7.99"},
ControlPlaneProviders: []string{"rke2-control-plane:v0.7.99"},
LogFolder: clusterctlLogFolder,
}, e2eConfig.GetIntervals(specName, "wait-controllers")...)
})

WaitForControlPlaneToBeReady(ctx, WaitForControlPlaneToBeReadyInput{
Getter: bootstrapClusterProxy.GetClient(),
Expand Down Expand Up @@ -174,7 +174,7 @@ var _ = Describe("Workload cluster creation", func() {
}, result)

WaitForClusterToUpgrade(ctx, WaitForClusterToUpgradeInput{
Lister: bootstrapClusterProxy.GetClient(),
Reader: bootstrapClusterProxy.GetClient(),
ControlPlane: result.ControlPlane,
MachineDeployments: result.MachineDeployments,
VersionAfterUpgrade: e2eConfig.GetVariable(KubernetesVersionUpgradeTo),
Expand Down
76 changes: 39 additions & 37 deletions test/e2e/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"

Expand Down Expand Up @@ -138,14 +137,6 @@ func ApplyClusterTemplateAndWait(ctx context.Context, input ApplyClusterTemplate
})
Expect(workloadClusterTemplate).ToNot(BeNil(), "Failed to get the cluster template")

// Ensure we have a Cluster for dump and cleanup steps in AfterEach even if ApplyClusterTemplateAndWait fails.
result.Cluster = &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: input.ConfigCluster.ClusterName,
Namespace: input.ConfigCluster.Namespace,
},
}

ApplyCustomClusterTemplateAndWait(ctx, ApplyCustomClusterTemplateAndWaitInput{
ClusterProxy: input.ClusterProxy,
CustomTemplateYAML: workloadClusterTemplate,
Expand Down Expand Up @@ -174,19 +165,10 @@ func ApplyCustomClusterTemplateAndWait(ctx context.Context, input ApplyCustomClu

Byf("Creating the workload cluster with name %q from the provided yaml", input.ClusterName)

// Ensure we have a Cluster for dump and cleanup steps in AfterEach even if ApplyClusterTemplateAndWait fails.
result.Cluster = &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: input.ClusterName,
Namespace: input.Namespace,
},
}

Byf("Applying the cluster template yaml of cluster %s", klog.KRef(input.Namespace, input.ClusterName))
Eventually(func() error {
return input.ClusterProxy.Apply(ctx, input.CustomTemplateYAML, input.Args...)
// return input.ClusterProxy.CreateOrUpdate(ctx, input.CustomTemplateYAML, input.CreateOrUpdateOpts...)
}, 1*time.Minute).Should(Succeed(), "Failed to apply the cluster template")
}, input.WaitForClusterIntervals...).Should(Succeed(), "Failed to apply the cluster template")

// Once we applied the cluster template we can run PreWaitForCluster.
// Note: This can e.g. be used to verify the BeforeClusterCreate lifecycle hook is executed
Expand Down Expand Up @@ -218,7 +200,7 @@ func ApplyCustomClusterTemplateAndWait(ctx context.Context, input ApplyCustomClu
input.WaitForControlPlaneMachinesReady(ctx, input, result)

Byf("Waiting for the machine deployments of cluster %s to be provisioned", klog.KRef(input.Namespace, input.ClusterName))
result.MachineDeployments = framework.DiscoveryAndWaitForMachineDeployments(ctx, framework.DiscoveryAndWaitForMachineDeploymentsInput{
result.MachineDeployments = DiscoveryAndWaitForMachineDeployments(ctx, framework.DiscoveryAndWaitForMachineDeploymentsInput{
Lister: input.ClusterProxy.GetClient(),
Cluster: result.Cluster,
}, input.WaitForMachineDeployments...)
Expand Down Expand Up @@ -285,7 +267,7 @@ func DiscoveryAndWaitForRKE2ControlPlaneInitialized(ctx context.Context, input D
Namespace: input.Cluster.Namespace,
})
g.Expect(controlPlane).ToNot(BeNil())
}, "10s", "1s").Should(Succeed(), "Couldn't get the control plane for the cluster %s", klog.KObj(input.Cluster))
}, "2m", "1s").Should(Succeed(), "Couldn't get the control plane for the cluster %s", klog.KObj(input.Cluster))

return controlPlane
}
Expand Down Expand Up @@ -445,7 +427,7 @@ func WaitForMachineConditions(ctx context.Context, input WaitForMachineCondition

// WaitForClusterToUpgradeInput is the input for WaitForClusterToUpgrade.
type WaitForClusterToUpgradeInput struct {
Lister framework.Lister
Reader framework.GetLister
ControlPlane *controlplanev1.RKE2ControlPlane
MachineDeployments []*clusterv1.MachineDeployment
VersionAfterUpgrade string
Expand All @@ -455,32 +437,52 @@ type WaitForClusterToUpgradeInput struct {
func WaitForClusterToUpgrade(ctx context.Context, input WaitForClusterToUpgradeInput, intervals ...interface{}) {
By("Waiting for machines to update")

var totalMachineCount int32
totalMachineCount = *input.ControlPlane.Spec.Replicas
Eventually(func() error {
cp := input.ControlPlane.DeepCopy()
if err := input.Reader.Get(ctx, client.ObjectKeyFromObject(input.ControlPlane), cp); err != nil {
return fmt.Errorf("failed to get control plane: %w", err)
}

for _, md := range input.MachineDeployments {
totalMachineCount += *md.Spec.Replicas
}
updatedDeployments := []*clusterv1.MachineDeployment{}
for _, md := range input.MachineDeployments {
copy := &clusterv1.MachineDeployment{}
if err := input.Reader.Get(ctx, client.ObjectKeyFromObject(md), copy); client.IgnoreNotFound(err) != nil {
return fmt.Errorf("failed to get updated machine deployment: %w", err)
}

Eventually(func() (bool, error) {
machineList := &clusterv1.MachineList{}
if err := input.Lister.List(ctx, machineList); err != nil {
return false, fmt.Errorf("failed to list machines: %w", err)
updatedDeployments = append(updatedDeployments, copy)
}

if len(machineList.Items) != int(totalMachineCount) { // not all replicas are created
return false, nil
machineList := &clusterv1.MachineList{}
if err := input.Reader.List(ctx, machineList); err != nil {
return fmt.Errorf("failed to list machines: %w", err)
}

for _, machine := range machineList.Items {
expectedVersion := input.VersionAfterUpgrade + "+rke2r1"
if machine.Spec.Version != nil && *machine.Spec.Version != expectedVersion {
return false, nil
if machine.Spec.Version == nil || *machine.Spec.Version != expectedVersion {
return fmt.Errorf("Expected machine version to match %s, got %v", expectedVersion, machine.Spec.Version)
}
}

return true, nil
}, intervals...).Should(BeTrue(), framework.PrettyPrint(input.ControlPlane))
ready := cp.Status.ReadyReplicas == cp.Status.Replicas
if !ready {
return fmt.Errorf("Control plane is not ready: %d ready from %d", cp.Status.ReadyReplicas, cp.Status.Replicas)
}

expected := cp.Spec.Replicas != nil && *cp.Spec.Replicas == cp.Status.Replicas
if !expected {
return fmt.Errorf("Control plane is not scaled: %d replicas from %d", cp.Spec.Replicas, cp.Status.Replicas)
}

for _, md := range updatedDeployments {
if md.Spec.Replicas == nil || *md.Spec.Replicas != md.Status.ReadyReplicas {
return fmt.Errorf("Not all machine deployments are updated yet expected %v!=%d", md.Spec.Replicas, md.Status.ReadyReplicas)
}
}

return nil
}, intervals...).Should(Succeed())
}

// setDefaults sets the default values for ApplyCustomClusterTemplateAndWaitInput if not set.
Expand Down