diff --git a/test/e2e/byohost_reuse_test.go b/test/e2e/byohost_reuse_test.go index 4d4ac3e25..36416b445 100644 --- a/test/e2e/byohost_reuse_test.go +++ b/test/e2e/byohost_reuse_test.go @@ -9,12 +9,9 @@ import ( "os" "path/filepath" - "github.com/docker/docker/api/types" - "github.com/docker/docker/client" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" infrastructurev1beta1 "github.com/vmware-tanzu/cluster-api-provider-bringyourownhost/apis/infrastructure/v1beta1" - corev1 "k8s.io/api/core/v1" k8stypes "k8s.io/apimachinery/pkg/types" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -26,71 +23,68 @@ import ( var _ = Describe("When BYO Host rejoins the capacity pool", func() { var ( - ctx context.Context - specName = "byohost-reuse" - namespace *corev1.Namespace - cancelWatches context.CancelFunc - clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult - dockerClient *client.Client - err error - byohostContainerIDs []string - agentLogFile1 = "/tmp/host-agent1.log" - agentLogFile2 = "/tmp/host-agent-reuse.log" + caseContextData *CaseContext = nil + collectInfoData *CollectInfoContext = nil + byoHostPoolData *ByoHostPoolContext = nil ) BeforeEach(func() { - ctx = context.TODO() - Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + caseContextData = new(CaseContext) + Expect(caseContextData).NotTo(BeNil()) + caseContextData.CaseName = "reuse" + caseContextData.ClusterConName = clusterConName + caseContextData.clusterProxy = bootstrapClusterProxy + caseContextData.ClusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + Expect(caseContextData.ClusterResources).NotTo(BeNil()) + + specName := caseContextData.CaseName + caseContextData.ctx = context.TODO() + Expect(caseContextData.ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) Expect(e2eConfig).NotTo(BeNil(), "Invalid argument. e2eConfig can't be nil when calling %s spec", specName) Expect(clusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. clusterctlConfigPath must be an existing file when calling %s spec", specName) Expect(bootstrapClusterProxy).NotTo(BeNil(), "Invalid argument. bootstrapClusterProxy can't be nil when calling %s spec", specName) Expect(os.MkdirAll(artifactFolder, 0755)).To(Succeed(), "Invalid argument. artifactFolder can't be created for %s spec", specName) - Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. - namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) - clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + setupSpecNamespace(caseContextData, artifactFolder) + + byoHostPoolData = new(ByoHostPoolContext) + Expect(byoHostPoolData).NotTo(BeNil()) + + collectInfoData = new(CollectInfoContext) + Expect(collectInfoData).NotTo(BeNil()) + collectInfoData.DeploymentLogDir = fmt.Sprintf("/tmp/%s-deplymentlogs", caseContextData.CaseName) }) It("Should reuse the same BYO Host after it is reset", func() { - clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6)) - byoHostName1 := "byohost-1" - byoHostName2 := "byohost-for-reuse" - - dockerClient, err = client.NewClientWithOpts(client.FromEnv) - Expect(err).NotTo(HaveOccurred()) - - var output types.HijackedResponse - output, byohostContainerID, err := setupByoDockerHost(ctx, clusterConName, byoHostName1, namespace.Name, dockerClient, bootstrapClusterProxy) - Expect(err).NotTo(HaveOccurred()) - defer output.Close() - byohostContainerIDs = append(byohostContainerIDs, byohostContainerID) - f := WriteDockerLog(output, agentLogFile1) - defer f.Close() - - output, byohostContainerID, err = setupByoDockerHost(ctx, clusterConName, byoHostName2, namespace.Name, dockerClient, bootstrapClusterProxy) - Expect(err).NotTo(HaveOccurred()) - defer output.Close() - byohostContainerIDs = append(byohostContainerIDs, byohostContainerID) - - // read the log of host agent container in backend, and write it - f = WriteDockerLog(output, agentLogFile2) - defer f.Close() + ctx := caseContextData.ctx + clusterProxy := caseContextData.clusterProxy + namespace := caseContextData.Namespace + specName := caseContextData.CaseName + clusterResources := caseContextData.ClusterResources + caseContextData.ClusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) + + byoHostPoolData.Capacity = 2 + Byf("Creating byohost capacity pool containing %d hosts", byoHostPoolData.Capacity) + fl := setupByohostPool(caseContextData, collectInfoData, byoHostPoolData) + for _, f := range fl { + defer f.Close() + } By("Creating a cluster") clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ - ClusterProxy: bootstrapClusterProxy, + ClusterProxy: clusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()), ClusterctlConfigPath: clusterctlConfigPath, - KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + KubeconfigPath: clusterProxy.GetKubeconfigPath(), InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, Flavor: clusterctl.DefaultFlavor, Namespace: namespace.Name, - ClusterName: clusterName, + ClusterName: caseContextData.ClusterName, KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), ControlPlaneMachineCount: pointer.Int64Ptr(1), WorkerMachineCount: pointer.Int64Ptr(1), @@ -101,38 +95,38 @@ var _ = Describe("When BYO Host rejoins the capacity pool", func() { }, clusterResources) // Assert on byohost cluster label to match clusterName - byoHostLookupKey := k8stypes.NamespacedName{Name: byoHostName2, Namespace: namespace.Name} + byoHostLookupKey := k8stypes.NamespacedName{Name: byoHostPoolData.ByoHostNames[1], Namespace: namespace.Name} byoHostToBeReused := &infrastructurev1beta1.ByoHost{} - Expect(bootstrapClusterProxy.GetClient().Get(ctx, byoHostLookupKey, byoHostToBeReused)).Should(Succeed()) + Expect(clusterProxy.GetClient().Get(ctx, byoHostLookupKey, byoHostToBeReused)).Should(Succeed()) cluster, ok := byoHostToBeReused.Labels[clusterv1.ClusterLabelName] Expect(ok).To(BeTrue()) - Expect(cluster).To(Equal(clusterName)) + Expect(cluster).To(Equal(caseContextData.ClusterName)) By("Delete the cluster and freeing the ByoHosts") framework.DeleteAllClustersAndWait(ctx, framework.DeleteAllClustersAndWaitInput{ - Client: bootstrapClusterProxy.GetClient(), + Client: clusterProxy.GetClient(), Namespace: namespace.Name, }, e2eConfig.GetIntervals(specName, "wait-delete-cluster")...) // Assert if cluster label is removed // This verifies that the byohost has rejoined the capacity pool byoHostToBeReused = &infrastructurev1beta1.ByoHost{} - Expect(bootstrapClusterProxy.GetClient().Get(ctx, byoHostLookupKey, byoHostToBeReused)).Should(Succeed()) + Expect(clusterProxy.GetClient().Get(ctx, byoHostLookupKey, byoHostToBeReused)).Should(Succeed()) _, ok = byoHostToBeReused.Labels[clusterv1.ClusterLabelName] Expect(ok).To(BeFalse()) By("Creating a new cluster") - clusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) + caseContextData.ClusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ - ClusterProxy: bootstrapClusterProxy, + ClusterProxy: clusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ - LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()), + LogFolder: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName()), ClusterctlConfigPath: clusterctlConfigPath, - KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + KubeconfigPath: clusterProxy.GetKubeconfigPath(), InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, Flavor: clusterctl.DefaultFlavor, Namespace: namespace.Name, - ClusterName: clusterName, + ClusterName: caseContextData.ClusterName, KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), ControlPlaneMachineCount: pointer.Int64Ptr(1), WorkerMachineCount: pointer.Int64Ptr(1), @@ -144,36 +138,25 @@ var _ = Describe("When BYO Host rejoins the capacity pool", func() { // Assert on byohost cluster label to match clusterName byoHostToBeReused = &infrastructurev1beta1.ByoHost{} - Expect(bootstrapClusterProxy.GetClient().Get(ctx, byoHostLookupKey, byoHostToBeReused)).Should(Succeed()) + Expect(clusterProxy.GetClient().Get(ctx, byoHostLookupKey, byoHostToBeReused)).Should(Succeed()) cluster, ok = byoHostToBeReused.Labels[clusterv1.ClusterLabelName] Expect(ok).To(BeTrue()) - Expect(cluster).To(Equal(clusterName)) - + Expect(cluster).To(Equal(caseContextData.ClusterName)) }) JustAfterEach(func() { if CurrentGinkgoTestDescription().Failed { - ShowInfo([]string{agentLogFile1, agentLogFile2}) + CollectInfo(caseContextData, collectInfoData) + ShowInfoBeforeCaseQuit() } }) AfterEach(func() { - // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. - dumpSpecResourcesAndCleanup(ctx, specName, bootstrapClusterProxy, artifactFolder, namespace, cancelWatches, clusterResources.Cluster, e2eConfig.GetIntervals, skipCleanup) - - if dockerClient != nil && len(byohostContainerIDs) != 0 { - for _, byohostContainerID := range byohostContainerIDs { - err := dockerClient.ContainerStop(ctx, byohostContainerID, nil) - Expect(err).NotTo(HaveOccurred()) - - err = dockerClient.ContainerRemove(ctx, byohostContainerID, types.ContainerRemoveOptions{}) - Expect(err).NotTo(HaveOccurred()) - } + dumpSpecResourcesAndCleanup(caseContextData, artifactFolder, e2eConfig.GetIntervals, skipCleanup) + cleanByohostPool(caseContextData, byoHostPoolData) + if CurrentGinkgoTestDescription().Failed { + ShowInfoAfterCaseQuit(collectInfoData) } - - os.Remove(agentLogFile1) - os.Remove(agentLogFile2) - os.Remove(ReadByohControllerManagerLogShellFile) - os.Remove(ReadAllPodsShellFile) + RemoveLogs(collectInfoData) }) }) diff --git a/test/e2e/e2e_debug_helper.go b/test/e2e/e2e_debug_helper.go index 29f9af68b..d02503c08 100644 --- a/test/e2e/e2e_debug_helper.go +++ b/test/e2e/e2e_debug_helper.go @@ -5,13 +5,23 @@ package e2e import ( "bufio" + "context" "fmt" + "io" "io/fs" "io/ioutil" "os" "os/exec" + "path" + "path/filepath" "github.com/docker/docker/api/types" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/controller-runtime/pkg/client" ) const ( @@ -20,6 +30,29 @@ const ( ReadAllPodsShellFile string = "/tmp/read-all-pods.sh" ) +type CaseContext struct { + ctx context.Context + clusterProxy framework.ClusterProxy + cancelWatches context.CancelFunc + CaseName string + ClusterConName string + ClusterName string + SpecName string + Namespace *corev1.Namespace + ClusterResources *clusterctl.ApplyClusterTemplateAndWaitResult +} + +type CollectInfoContext struct { + AgentLogList []string + DeploymentLogDir string +} + +type WriteDeploymentLogContext struct { + DeploymentName string + DeploymentNamespace string + ContainerName string +} + func WriteDockerLog(output types.HijackedResponse, outputFile string) *os.File { s := make(chan string) e := make(chan error) @@ -32,7 +65,8 @@ func WriteDockerLog(output types.HijackedResponse, outputFile string) *os.File { go func() { for { - line, _, err := buf.ReadLine() + var line []byte + line, _, err = buf.ReadLine() if err != nil { // will be quit by this err: read unix @->/run/docker.sock: use of closed network connection e <- err @@ -47,11 +81,11 @@ func WriteDockerLog(output types.HijackedResponse, outputFile string) *os.File { for { select { case line := <-s: - _, err2 := f.WriteString(line + "\n") - if err2 != nil { - Showf("Write String to file failed, err2=%v", err2) + // ignore to print the error: file already closed + _, err = f.WriteString(line + "\n") + if err == nil { + _ = f.Sync() } - _ = f.Sync() case err := <-e: // Please ignore this error if you see it in output Showf("Get err %v", err) @@ -113,7 +147,98 @@ func WriteShellScript(shellFileName string, shellFileContent []string) { } } -func ShowInfo(allAgentLogFiles []string) { +func WriteDeploymentLogs(caseContextData *CaseContext, collectInfoData *CollectInfoContext, writeDeploymentLogData *WriteDeploymentLogContext) { + ctx := caseContextData.ctx + clusterProxy := caseContextData.clusterProxy + deploymentLogDir := collectInfoData.DeploymentLogDir + deploymentNamespace := writeDeploymentLogData.DeploymentNamespace + deploymentName := writeDeploymentLogData.DeploymentName + containerName := writeDeploymentLogData.ContainerName + + deployment := &appsv1.Deployment{} + key := client.ObjectKey{ + Namespace: deploymentNamespace, + Name: deploymentName, + } + + if err := clusterProxy.GetClient().Get(ctx, key, deployment); err != nil { + Showf("failed to get deployment %s/%s: %v", deploymentNamespace, deploymentName, err) + return + } + + selector, err := metav1.LabelSelectorAsMap(deployment.Spec.Selector) + if err != nil { + Showf("failed to get selector: %v", err) + return + } + + podList := &corev1.PodList{} + if err = clusterProxy.GetClient().List(ctx, podList, client.InNamespace(deploymentNamespace), client.MatchingLabels(selector)); err != nil { + Showf("failed to List pods in namespace %s : %v", deploymentNamespace, err) + return + } + + pods := podList.Items + containers := deployment.Spec.Template.Spec.Containers + + os.RemoveAll(deploymentLogDir) + if err = os.MkdirAll(deploymentLogDir, DefaultFileMode); err != nil { + Showf("failed to create dir %s : %v", deploymentLogDir, err) + return + } + + for i := range pods { + for j := range containers { + if containers[j].Name != containerName { + continue + } + go func(pod corev1.Pod, container corev1.Container) { + logFile := path.Join(deploymentLogDir, pod.Name+"-"+container.Name+".log") + var f *os.File + f, err = os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY, DefaultFileMode) + if err != nil { + Showf("failed to open %s : %v", logFile, err) + return + } + defer f.Close() + + opts := &corev1.PodLogOptions{ + Container: container.Name, + Follow: true, + } + var podLogs io.ReadCloser + podLogs, err = clusterProxy.GetClientSet().CoreV1().Pods(deploymentNamespace).GetLogs(pod.Name, opts).Stream(ctx) + if err != nil { + Showf("failed to get the log of pod %s: %v", pod.Name, err) + return + } + defer podLogs.Close() + + out := bufio.NewWriter(f) + defer out.Flush() + + _, err = out.ReadFrom(podLogs) + if err != nil && err != io.ErrUnexpectedEOF { + Showf("Got error while streaming logs for pod %s/%s, container %s: %v", deploymentNamespace, pod.Name, container.Name, err) + return + } + }(pods[i], containers[j]) + } + } +} + +func ShowDeploymentLogs(logDir string) { + logFiles, err := filepath.Glob(logDir + "/*") + if err != nil { + Showf("failed to list all file from dir %s: %v", logDir, err) + return + } + for _, logFile := range logFiles { + ShowFileContent(logFile) + } +} + +func ShowInfoBeforeCaseQuit() { // show swap status // showFileContent("/proc/swaps") @@ -124,20 +249,36 @@ func ShowInfo(allAgentLogFiles []string) { WriteShellScript(ReadAllPodsShellFile, shellContent) ShowFileContent(ReadAllPodsShellFile) ExecuteShellScript(ReadAllPodsShellFile) +} + +func CollectInfo(caseContextData *CaseContext, collectInfoData *CollectInfoContext) { + // collecting deployment logs in go rountinue + WriteDeploymentLogs(caseContextData, collectInfoData, &WriteDeploymentLogContext{ + DeploymentName: "byoh-controller-manager", + DeploymentNamespace: "byoh-system", + ContainerName: "manager", + }) +} +// The jobs "write agent log" and "write Deployment logs" are running in go rountine. +// Move jobs "show agent log" and "show Deployment logs" here is to get the full logs. +// Because only after agent and deployment quit, then it can get full logs. +func ShowInfoAfterCaseQuit(collectInfoData *CollectInfoContext) { // show the agent log - for _, agentLogFile := range allAgentLogFiles { + for _, agentLogFile := range collectInfoData.AgentLogList { ShowFileContent(agentLogFile) } - // show byoh-controller-manager logs - shellContent = []string{ - "podNamespace=`kubectl get pods --all-namespaces --kubeconfig /tmp/mgmt.conf | grep byoh-controller-manager | awk '{print $1}'`", - "podName=`kubectl get pods --all-namespaces --kubeconfig /tmp/mgmt.conf | grep byoh-controller-manager | awk '{print $2}'`", - "kubectl logs -n ${podNamespace} ${podName} --kubeconfig /tmp/mgmt.conf -c manager", + // show the deployment's pods log + ShowDeploymentLogs(collectInfoData.DeploymentLogDir) +} + +func RemoveLogs(collectInfoData *CollectInfoContext) { + for _, agentLogFile := range collectInfoData.AgentLogList { + os.Remove(agentLogFile) } - WriteShellScript(ReadByohControllerManagerLogShellFile, shellContent) - ShowFileContent(ReadByohControllerManagerLogShellFile) - ExecuteShellScript(ReadByohControllerManagerLogShellFile) + os.Remove(ReadByohControllerManagerLogShellFile) + os.Remove(ReadAllPodsShellFile) + os.RemoveAll(collectInfoData.DeploymentLogDir) } diff --git a/test/e2e/e2e_docker_helper.go b/test/e2e/e2e_docker_helper.go index 42659ab93..d9a0c6d90 100644 --- a/test/e2e/e2e_docker_helper.go +++ b/test/e2e/e2e_docker_helper.go @@ -5,6 +5,7 @@ package e2e import ( "context" + "fmt" "io" "os" "path/filepath" @@ -21,7 +22,6 @@ import ( . "github.com/onsi/gomega" // nolint: stylecheck "github.com/onsi/gomega/gexec" "github.com/pkg/errors" - "sigs.k8s.io/cluster-api/test/framework" ) const ( @@ -37,6 +37,13 @@ type cpConfig struct { container string } +type ByoHostPoolContext struct { + Capacity int + ContainerIDs []string + dockerClient *client.Client + ByoHostNames []string +} + func resolveLocalPath(localPath string) (absPath string, err error) { if absPath, err = filepath.Abs(localPath); err != nil { return @@ -148,7 +155,13 @@ func createDockerContainer(ctx context.Context, byoHostName string, dockerClient nil, byoHostName) } -func setupByoDockerHost(ctx context.Context, clusterConName, byoHostName, namespace string, dockerClient *client.Client, bootstrapClusterProxy framework.ClusterProxy) (types.HijackedResponse, string, error) { +func setupByoDockerHost(caseContextData *CaseContext, byoHostPoolData *ByoHostPoolContext, byoHostName string) (types.HijackedResponse, error) { + ctx := caseContextData.ctx + clusterConName := caseContextData.ClusterConName + namespace := caseContextData.Namespace.Name + bootstrapClusterProxy := caseContextData.clusterProxy + dockerClient := byoHostPoolData.dockerClient + byohost, err := createDockerContainer(ctx, byoHostName, dockerClient) Expect(err).NotTo(HaveOccurred()) @@ -199,5 +212,43 @@ func setupByoDockerHost(ctx context.Context, clusterConName, byoHostName, namesp output, err := dockerClient.ContainerExecAttach(ctx, resp.ID, types.ExecStartCheck{}) - return output, byohost.ID, err + byoHostPoolData.ContainerIDs = append(byoHostPoolData.ContainerIDs, byohost.ID) + + return output, err +} + +func setupByohostPool(caseContextData *CaseContext, collectInfoData *CollectInfoContext, byoHostPoolData *ByoHostPoolContext) []*os.File { + var fileList []*os.File + var err error + caseName := caseContextData.CaseName + + byoHostPoolData.dockerClient, err = client.NewClientWithOpts(client.FromEnv) + Expect(err).NotTo(HaveOccurred()) + + for i := 0; i < byoHostPoolData.Capacity; i++ { + byoHostName := fmt.Sprintf("byohost-%s-%d", caseName, i) + output, err := setupByoDockerHost(caseContextData, byoHostPoolData, byoHostName) + Expect(err).NotTo(HaveOccurred()) + + byoHostPoolData.ByoHostNames = append(byoHostPoolData.ByoHostNames, byoHostName) + + // read the log of host agent container in backend, and write it + agentLogFile := fmt.Sprintf("/tmp/host-agent-%s-%d.log", caseName, i) + f := WriteDockerLog(output, agentLogFile) + fileList = append(fileList, f) + collectInfoData.AgentLogList = append(collectInfoData.AgentLogList, agentLogFile) + } + return fileList +} + +func cleanByohostPool(caseContextData *CaseContext, byoHostPoolData *ByoHostPoolContext) { + if byoHostPoolData.dockerClient != nil && len(byoHostPoolData.ContainerIDs) != 0 { + for _, byohostContainerID := range byoHostPoolData.ContainerIDs { + err := byoHostPoolData.dockerClient.ContainerStop(caseContextData.ctx, byohostContainerID, nil) + Expect(err).NotTo(HaveOccurred()) + + err = byoHostPoolData.dockerClient.ContainerRemove(caseContextData.ctx, byohostContainerID, types.ContainerRemoveOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + } } diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index fdc60c024..e0fa7115e 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -15,15 +15,20 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" infraproviderv1 "github.com/vmware-tanzu/cluster-api-provider-bringyourownhost/apis/infrastructure/v1beta1" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/bootstrap" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" ) +const ( + KubernetesVersion = "KUBERNETES_VERSION" + CNIPath = "CNI" + CNIResources = "CNI_RESOURCES" + IPFamily = "IP_FAMILY" +) + // Test suite flags var ( // configPath is the path to the e2e config file. @@ -204,33 +209,44 @@ func tearDown(bootstrapClusterProvider bootstrap.ClusterProvider, bootstrapClust } } -func setupSpecNamespace(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, artifactFolder string) (*corev1.Namespace, context.CancelFunc) { +func setupSpecNamespace(caseContextData *CaseContext, artifactFolder string) { + ctx := caseContextData.ctx + clusterProxy := caseContextData.clusterProxy + specName := caseContextData.CaseName + Byf("Creating a namespace for hosting the %q test spec", specName) - namespace, cancelWatches := framework.CreateNamespaceAndWatchEvents(ctx, framework.CreateNamespaceAndWatchEventsInput{ + caseContextData.Namespace, caseContextData.cancelWatches = framework.CreateNamespaceAndWatchEvents(ctx, framework.CreateNamespaceAndWatchEventsInput{ Creator: clusterProxy.GetClient(), ClientSet: clusterProxy.GetClientSet(), Name: fmt.Sprintf("%s-%s", specName, util.RandomString(6)), LogFolder: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName()), }) - - return namespace, cancelWatches } -func dumpSpecResourcesAndCleanup(ctx context.Context, specName string, clusterProxy framework.ClusterProxy, artifactFolder string, namespace *corev1.Namespace, cancelWatches context.CancelFunc, cluster *clusterv1.Cluster, intervalsGetter func(spec, key string) []interface{}, skipCleanup bool) { +// Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. +func dumpSpecResourcesAndCleanup(caseContextData *CaseContext, artifactFolder string, intervalsGetter func(spec, key string) []interface{}, skipCleanup bool) { + ctx := caseContextData.ctx + cluster := caseContextData.ClusterResources.Cluster + clusterProxy := caseContextData.clusterProxy + namespace := caseContextData.Namespace + cancelWatches := caseContextData.cancelWatches + specName := caseContextData.CaseName + + if cluster == nil { + return + } Byf("Dumping logs from the %q workload cluster", cluster.Name) // Dump all the logs from the workload cluster before deleting them. clusterProxy.CollectWorkloadClusterLogs(ctx, cluster.Namespace, cluster.Name, filepath.Join(artifactFolder, "clusters", cluster.Name, "machines")) Byf("Dumping all the Cluster API resources in the %q namespace", namespace.Name) - // Dump all Cluster API related resources to artifacts before deleting them. framework.DumpAllResources(ctx, framework.DumpAllResourcesInput{ Lister: clusterProxy.GetClient(), Namespace: namespace.Name, LogPath: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName(), "resources"), }) - if !skipCleanup { Byf("Deleting cluster %s/%s", cluster.Namespace, cluster.Name) // While https://github.com/kubernetes-sigs/cluster-api/issues/2955 is addressed in future iterations, there is a chance diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index de3ff2a4e..3a6caa1d4 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -9,93 +9,81 @@ import ( "os" "path/filepath" - "github.com/docker/docker/api/types" - "github.com/docker/docker/client" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" "k8s.io/utils/pointer" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" ) -const ( - KubernetesVersion = "KUBERNETES_VERSION" - CNIPath = "CNI" - CNIResources = "CNI_RESOURCES" - IPFamily = "IP_FAMILY" -) - // creating a workload cluster // This test is meant to provide a first, fast signal to detect regression; it is recommended to use it as a PR blocker test. var _ = Describe("When BYOH joins existing cluster [PR-Blocking]", func() { var ( - ctx context.Context - specName = "quick-start" - namespace *corev1.Namespace - clusterName string - cancelWatches context.CancelFunc - clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult - dockerClient *client.Client - err error - byohostContainerIDs []string - agentLogFile1 = "/tmp/host-agent1.log" - agentLogFile2 = "/tmp/host-agent2.log" + caseContextData *CaseContext = nil + collectInfoData *CollectInfoContext = nil + byoHostPoolData *ByoHostPoolContext = nil ) BeforeEach(func() { - ctx = context.TODO() - Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + caseContextData = new(CaseContext) + Expect(caseContextData).NotTo(BeNil()) + caseContextData.CaseName = "single" + caseContextData.ClusterConName = clusterConName + caseContextData.clusterProxy = bootstrapClusterProxy + caseContextData.ClusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + Expect(caseContextData.ClusterResources).NotTo(BeNil()) + + specName := caseContextData.CaseName + caseContextData.ctx = context.TODO() + Expect(caseContextData.ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) Expect(e2eConfig).NotTo(BeNil(), "Invalid argument. e2eConfig can't be nil when calling %s spec", specName) Expect(clusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. clusterctlConfigPath must be an existing file when calling %s spec", specName) - Expect(bootstrapClusterProxy).NotTo(BeNil(), "Invalid argument. bootstrapClusterProxy can't be nil when calling %s spec", specName) + Expect(caseContextData.clusterProxy).NotTo(BeNil(), "Invalid argument. bootstrapClusterProxy can't be nil when calling %s spec", specName) Expect(os.MkdirAll(artifactFolder, 0755)).To(Succeed(), "Invalid argument. artifactFolder can't be created for %s spec", specName) - Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. - namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) - clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + setupSpecNamespace(caseContextData, artifactFolder) + + byoHostPoolData = new(ByoHostPoolContext) + Expect(byoHostPoolData).NotTo(BeNil()) + byoHostPoolData.Capacity = 2 + + collectInfoData = new(CollectInfoContext) + Expect(collectInfoData).NotTo(BeNil()) + collectInfoData.DeploymentLogDir = "/tmp/deplymentlogs" + }) It("Should create a workload cluster with single BYOH host", func() { - clusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) - byoHostName1 := "byohost1" - byoHostName2 := "byohost2" - - dockerClient, err = client.NewClientWithOpts(client.FromEnv) - Expect(err).NotTo(HaveOccurred()) - - var output types.HijackedResponse - output, byohostContainerID, err := setupByoDockerHost(ctx, clusterConName, byoHostName1, namespace.Name, dockerClient, bootstrapClusterProxy) - Expect(err).NotTo(HaveOccurred()) - defer output.Close() - byohostContainerIDs = append(byohostContainerIDs, byohostContainerID) - f := WriteDockerLog(output, agentLogFile1) - defer f.Close() - - output, byohostContainerID, err = setupByoDockerHost(ctx, clusterConName, byoHostName2, namespace.Name, dockerClient, bootstrapClusterProxy) - Expect(err).NotTo(HaveOccurred()) - defer output.Close() - byohostContainerIDs = append(byohostContainerIDs, byohostContainerID) - - // read the log of host agent container in backend, and write it - f = WriteDockerLog(output, agentLogFile2) - defer f.Close() + ctx := caseContextData.ctx + clusterProxy := caseContextData.clusterProxy + namespace := caseContextData.Namespace + specName := caseContextData.CaseName + clusterResources := caseContextData.ClusterResources + + caseContextData.ClusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) + + fl := setupByohostPool(caseContextData, collectInfoData, byoHostPoolData) + Byf("Creating byohost capacity pool containing %d hosts", byoHostPoolData.Capacity) + for _, f := range fl { + defer f.Close() + } clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ - ClusterProxy: bootstrapClusterProxy, + ClusterProxy: clusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ - LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()), + LogFolder: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName()), ClusterctlConfigPath: clusterctlConfigPath, - KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + KubeconfigPath: clusterProxy.GetKubeconfigPath(), InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, Flavor: clusterctl.DefaultFlavor, Namespace: namespace.Name, - ClusterName: clusterName, + ClusterName: caseContextData.ClusterName, KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), ControlPlaneMachineCount: pointer.Int64Ptr(1), WorkerMachineCount: pointer.Int64Ptr(1), @@ -104,32 +92,21 @@ var _ = Describe("When BYOH joins existing cluster [PR-Blocking]", func() { WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), }, clusterResources) - }) JustAfterEach(func() { if CurrentGinkgoTestDescription().Failed { - ShowInfo([]string{agentLogFile1, agentLogFile2}) + CollectInfo(caseContextData, collectInfoData) + ShowInfoBeforeCaseQuit() } }) AfterEach(func() { - // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. - dumpSpecResourcesAndCleanup(ctx, specName, bootstrapClusterProxy, artifactFolder, namespace, cancelWatches, clusterResources.Cluster, e2eConfig.GetIntervals, skipCleanup) - - if dockerClient != nil && len(byohostContainerIDs) != 0 { - for _, byohostContainerID := range byohostContainerIDs { - err := dockerClient.ContainerStop(ctx, byohostContainerID, nil) - Expect(err).NotTo(HaveOccurred()) - - err = dockerClient.ContainerRemove(ctx, byohostContainerID, types.ContainerRemoveOptions{}) - Expect(err).NotTo(HaveOccurred()) - } + dumpSpecResourcesAndCleanup(caseContextData, artifactFolder, e2eConfig.GetIntervals, skipCleanup) + cleanByohostPool(caseContextData, byoHostPoolData) + if CurrentGinkgoTestDescription().Failed { + ShowInfoAfterCaseQuit(collectInfoData) } - - os.Remove(agentLogFile1) - os.Remove(agentLogFile2) - os.Remove(ReadByohControllerManagerLogShellFile) - os.Remove(ReadAllPodsShellFile) + RemoveLogs(collectInfoData) }) }) diff --git a/test/e2e/md_scale_test.go b/test/e2e/md_scale_test.go index ee1d6b331..e9d2e4775 100644 --- a/test/e2e/md_scale_test.go +++ b/test/e2e/md_scale_test.go @@ -9,11 +9,8 @@ import ( "os" "path/filepath" - "github.com/docker/docker/api/types" - "github.com/docker/docker/client" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" "k8s.io/utils/pointer" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" @@ -23,69 +20,71 @@ import ( var _ = Describe("When testing MachineDeployment scale out/in", func() { var ( - ctx context.Context - specName = "md-scale" - namespace *corev1.Namespace - cancelWatches context.CancelFunc - clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult - dockerClient *client.Client - err error - byoHostCapacityPool = 6 - byoHostName string - allbyohostContainerIDs []string - allAgentLogFiles []string + caseContextData *CaseContext = nil + collectInfoData *CollectInfoContext = nil + byoHostPoolData *ByoHostPoolContext = nil ) BeforeEach(func() { - ctx = context.TODO() - Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + caseContextData = new(CaseContext) + Expect(caseContextData).NotTo(BeNil()) + caseContextData.CaseName = "md-scale" + caseContextData.ClusterConName = clusterConName + caseContextData.clusterProxy = bootstrapClusterProxy + caseContextData.ClusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + Expect(caseContextData.ClusterResources).NotTo(BeNil()) + + specName := caseContextData.CaseName + caseContextData.ctx = context.TODO() + Expect(caseContextData.ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) Expect(e2eConfig).NotTo(BeNil(), "Invalid argument. e2eConfig can't be nil when calling %s spec", specName) Expect(clusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. clusterctlConfigPath must be an existing file when calling %s spec", specName) Expect(bootstrapClusterProxy).NotTo(BeNil(), "Invalid argument. bootstrapClusterProxy can't be nil when calling %s spec", specName) Expect(os.MkdirAll(artifactFolder, 0755)).To(Succeed(), "Invalid argument. artifactFolder can't be created for %s spec", specName) - Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. - namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) - clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + setupSpecNamespace(caseContextData, artifactFolder) + + byoHostPoolData = new(ByoHostPoolContext) + Expect(byoHostPoolData).NotTo(BeNil()) + + collectInfoData = new(CollectInfoContext) + Expect(collectInfoData).NotTo(BeNil()) + collectInfoData.DeploymentLogDir = fmt.Sprintf("/tmp/%s-deplymentlogs", caseContextData.CaseName) + }) It("Should successfully scale a MachineDeployment up and down upon changes to the MachineDeployment replica count", func() { - clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6)) - dockerClient, err = client.NewClientWithOpts(client.FromEnv) - Expect(err).NotTo(HaveOccurred()) + ctx := caseContextData.ctx + clusterProxy := caseContextData.clusterProxy + namespace := caseContextData.Namespace + specName := caseContextData.CaseName + clusterResources := caseContextData.ClusterResources - By("Creating byohost capacity pool containing 5 hosts") - for i := 0; i < byoHostCapacityPool; i++ { - byoHostName = fmt.Sprintf("byohost-%s", util.RandomString(6)) - output, byohostContainerID, err := setupByoDockerHost(ctx, clusterConName, byoHostName, namespace.Name, dockerClient, bootstrapClusterProxy) - allbyohostContainerIDs = append(allbyohostContainerIDs, byohostContainerID) - Expect(err).NotTo(HaveOccurred()) + caseContextData.ClusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) - // read the log of host agent container in backend, and write it - agentLogFile := fmt.Sprintf("/tmp/host-agent-%d.log", i) - f := WriteDockerLog(output, agentLogFile) + byoHostPoolData.Capacity = 6 + Byf("Creating byohost capacity pool containing %d hosts", byoHostPoolData.Capacity) + fl := setupByohostPool(caseContextData, collectInfoData, byoHostPoolData) + for _, f := range fl { defer f.Close() - allAgentLogFiles = append(allAgentLogFiles, agentLogFile) } - // TODO: Write agent logs to files for better debugging - By("creating a workload cluster with one control plane node and one worker node") clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ - ClusterProxy: bootstrapClusterProxy, + ClusterProxy: clusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ - LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()), + LogFolder: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName()), ClusterctlConfigPath: clusterctlConfigPath, - KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + KubeconfigPath: clusterProxy.GetKubeconfigPath(), InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, Flavor: clusterctl.DefaultFlavor, Namespace: namespace.Name, - ClusterName: clusterName, + ClusterName: caseContextData.ClusterName, KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), ControlPlaneMachineCount: pointer.Int64Ptr(3), WorkerMachineCount: pointer.Int64Ptr(1), @@ -99,7 +98,7 @@ var _ = Describe("When testing MachineDeployment scale out/in", func() { By("Scaling the MachineDeployment out to 3") framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: bootstrapClusterProxy, + ClusterProxy: clusterProxy, Cluster: clusterResources.Cluster, MachineDeployment: clusterResources.MachineDeployments[0], Replicas: 3, @@ -110,42 +109,28 @@ var _ = Describe("When testing MachineDeployment scale out/in", func() { By("Scaling the MachineDeployment down to 2") framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: bootstrapClusterProxy, + ClusterProxy: clusterProxy, Cluster: clusterResources.Cluster, MachineDeployment: clusterResources.MachineDeployments[0], Replicas: 2, WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), }) - Expect(clusterResources.MachineDeployments[0].Spec.Replicas).To(Equal(pointer.Int32Ptr(2))) - }) JustAfterEach(func() { if CurrentGinkgoTestDescription().Failed { - ShowInfo(allAgentLogFiles) + CollectInfo(caseContextData, collectInfoData) + ShowInfoBeforeCaseQuit() } }) AfterEach(func() { - // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. - dumpSpecResourcesAndCleanup(ctx, specName, bootstrapClusterProxy, artifactFolder, namespace, cancelWatches, clusterResources.Cluster, e2eConfig.GetIntervals, skipCleanup) - - if dockerClient != nil { - for _, byohostContainerID := range allbyohostContainerIDs { - err := dockerClient.ContainerStop(ctx, byohostContainerID, nil) - Expect(err).NotTo(HaveOccurred()) - - err = dockerClient.ContainerRemove(ctx, byohostContainerID, types.ContainerRemoveOptions{}) - Expect(err).NotTo(HaveOccurred()) - } - - } - - for _, agentLogFile := range allAgentLogFiles { - os.Remove(agentLogFile) + dumpSpecResourcesAndCleanup(caseContextData, artifactFolder, e2eConfig.GetIntervals, skipCleanup) + cleanByohostPool(caseContextData, byoHostPoolData) + if CurrentGinkgoTestDescription().Failed { + ShowInfoAfterCaseQuit(collectInfoData) } - os.Remove(ReadByohControllerManagerLogShellFile) - os.Remove(ReadAllPodsShellFile) + RemoveLogs(collectInfoData) }) })