From 5a490f7150d5af13f24cbd322f8bf49227a52083 Mon Sep 17 00:00:00 2001 From: rahulbabu95 Date: Wed, 27 Sep 2023 00:21:30 +0000 Subject: [PATCH] Enable modular upgrades for tinkerbell provider in CLI Signed-off-by: Rahul Ganesh --- ...mazonaws.com_tinkerbellmachineconfigs.yaml | 3 + config/manifest/eksa-components.yaml | 3 + pkg/api/v1alpha1/cluster_webhook.go | 7 - pkg/api/v1alpha1/cluster_webhook_test.go | 27 -- pkg/api/v1alpha1/tinkerbellmachineconfig.go | 7 + .../v1alpha1/tinkerbellmachineconfig_types.go | 1 + .../tinkerbellmachineconfig_types_test.go | 7 + pkg/providers/tinkerbell/assert.go | 106 ++++++-- pkg/providers/tinkerbell/assert_test.go | 173 +++++++++++++ pkg/providers/tinkerbell/cluster.go | 1 + .../tinkerbell/reconciler/reconciler.go | 53 ++-- pkg/providers/tinkerbell/template.go | 21 +- pkg/providers/tinkerbell/template_test.go | 51 ++++ .../cluster_osimage_machine_config.yaml | 238 ++++++++++++++++++ pkg/providers/tinkerbell/upgrade.go | 45 +++- pkg/providers/tinkerbell/upgrade_test.go | 150 +++++++++++ pkg/providers/tinkerbell/validate.go | 15 +- 17 files changed, 821 insertions(+), 87 deletions(-) create mode 100644 pkg/providers/tinkerbell/testdata/cluster_osimage_machine_config.yaml diff --git a/config/crd/bases/anywhere.eks.amazonaws.com_tinkerbellmachineconfigs.yaml b/config/crd/bases/anywhere.eks.amazonaws.com_tinkerbellmachineconfigs.yaml index 5b96943e59e54..0dca96ecb00cd 100644 --- a/config/crd/bases/anywhere.eks.amazonaws.com_tinkerbellmachineconfigs.yaml +++ b/config/crd/bases/anywhere.eks.amazonaws.com_tinkerbellmachineconfigs.yaml @@ -127,6 +127,8 @@ spec: type: object osFamily: type: string + osImageURL: + type: string templateRef: properties: kind: @@ -153,6 +155,7 @@ spec: required: - hardwareSelector - osFamily + - osImageURL type: object status: description: TinkerbellMachineConfigStatus defines the observed state diff --git a/config/manifest/eksa-components.yaml b/config/manifest/eksa-components.yaml index b5d6c6439808f..9eacbf86a5d34 100644 --- a/config/manifest/eksa-components.yaml +++ b/config/manifest/eksa-components.yaml @@ -5745,6 +5745,8 @@ spec: type: object osFamily: type: string + osImageURL: + type: string templateRef: properties: kind: @@ -5771,6 +5773,7 @@ spec: required: - hardwareSelector - osFamily + - osImageURL type: object status: description: TinkerbellMachineConfigStatus defines the observed state diff --git a/pkg/api/v1alpha1/cluster_webhook.go b/pkg/api/v1alpha1/cluster_webhook.go index 41e2ca5bad699..0339c7887758b 100644 --- a/pkg/api/v1alpha1/cluster_webhook.go +++ b/pkg/api/v1alpha1/cluster_webhook.go @@ -473,8 +473,6 @@ func validateKubeVersionSkew(newVersion, oldVersion KubernetesVersion, path *fie // ValidateWorkerKubernetesVersionSkew validates worker node group Kubernetes version skew between upgrades. func ValidateWorkerKubernetesVersionSkew(new, old *Cluster) field.ErrorList { var allErrs field.ErrorList - path := field.NewPath("spec").Child("WorkerNodeConfiguration.kubernetesVersion") - newClusterVersion := new.Spec.KubernetesVersion oldClusterVersion := old.Spec.KubernetesVersion @@ -485,11 +483,6 @@ func ValidateWorkerKubernetesVersionSkew(new, old *Cluster) field.ErrorList { for _, nodeGroupNewSpec := range new.Spec.WorkerNodeGroupConfigurations { newVersion := nodeGroupNewSpec.KubernetesVersion - if newVersion != nil && nodeGroupNewSpec.MachineGroupRef.Kind == TinkerbellMachineConfigKind { - allErrs = append(allErrs, field.Forbidden(path, "worker node group level kubernetesVersion is not supported for Tinkerbell")) - return allErrs - } - if workerNodeGrpOldSpec, ok := workerNodeGroupMap[nodeGroupNewSpec.Name]; ok { oldVersion := workerNodeGrpOldSpec.KubernetesVersion allErrs = append(allErrs, performWorkerKubernetesValidations(oldVersion, newVersion, oldClusterVersion, newClusterVersion)...) diff --git a/pkg/api/v1alpha1/cluster_webhook_test.go b/pkg/api/v1alpha1/cluster_webhook_test.go index 1d3416bdf2ecf..891df2fc3f7c7 100644 --- a/pkg/api/v1alpha1/cluster_webhook_test.go +++ b/pkg/api/v1alpha1/cluster_webhook_test.go @@ -2331,30 +2331,3 @@ func TestValidateWorkerVersionSkewAddNodeGroup(t *testing.T) { g := NewWithT(t) g.Expect(err).To(Succeed()) } - -func TestValidateWorkerVersionBlockTinkerbell(t *testing.T) { - kube119 := v1alpha1.KubernetesVersion("1.19") - - newCluster := baseCluster() - newCluster.Spec.KubernetesVersion = kube119 - newCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119 - newCluster.Spec.WorkerNodeGroupConfigurations[0].MachineGroupRef.Kind = v1alpha1.TinkerbellMachineConfigKind - newWorker := v1alpha1.WorkerNodeGroupConfiguration{ - Name: "md-1", - Count: ptr.Int(1), - MachineGroupRef: &v1alpha1.Ref{ - Kind: v1alpha1.TinkerbellMachineConfigKind, - Name: "eksa-unit-test", - }, - KubernetesVersion: &kube119, - } - newCluster.Spec.WorkerNodeGroupConfigurations = append(newCluster.Spec.WorkerNodeGroupConfigurations, newWorker) - - oldCluster := baseCluster() - oldCluster.Spec.KubernetesVersion = kube119 - oldCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119 - - err := newCluster.ValidateUpdate(oldCluster) - g := NewWithT(t) - g.Expect(err).ToNot(BeNil()) -} diff --git a/pkg/api/v1alpha1/tinkerbellmachineconfig.go b/pkg/api/v1alpha1/tinkerbellmachineconfig.go index 20cd5a56e4e3a..1750e721d7bd3 100644 --- a/pkg/api/v1alpha1/tinkerbellmachineconfig.go +++ b/pkg/api/v1alpha1/tinkerbellmachineconfig.go @@ -2,6 +2,7 @@ package v1alpha1 import ( "fmt" + "net/url" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -91,6 +92,12 @@ func validateTinkerbellMachineConfig(config *TinkerbellMachineConfig) error { ) } + if config.Spec.OSImageURL != "" { + if _, err := url.ParseRequestURI(config.Spec.OSImageURL); err != nil { + return fmt.Errorf("parsing osImageOverride: %v", err) + } + } + if len(config.Spec.Users) == 0 { return fmt.Errorf("TinkerbellMachineConfig: missing spec.Users: %s", config.Name) } diff --git a/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go b/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go index 059452823297e..e81649705938d 100644 --- a/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go +++ b/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go @@ -13,6 +13,7 @@ type TinkerbellMachineConfigSpec struct { HardwareSelector HardwareSelector `json:"hardwareSelector"` TemplateRef Ref `json:"templateRef,omitempty"` OSFamily OSFamily `json:"osFamily"` + OSImageURL string `json:"osImageURL"` Users []UserConfiguration `json:"users,omitempty"` HostOSConfiguration *HostOSConfiguration `json:"hostOSConfiguration,omitempty"` } diff --git a/pkg/api/v1alpha1/tinkerbellmachineconfig_types_test.go b/pkg/api/v1alpha1/tinkerbellmachineconfig_types_test.go index ef45b1ac5088c..206fc8a981fda 100644 --- a/pkg/api/v1alpha1/tinkerbellmachineconfig_types_test.go +++ b/pkg/api/v1alpha1/tinkerbellmachineconfig_types_test.go @@ -66,6 +66,13 @@ func TestTinkerbellMachineConfigValidateFail(t *testing.T) { ), expectedErr: "HostOSConfiguration is invalid for TinkerbellMachineConfig tinkerbellmachineconfig: NTPConfiguration.Servers can not be empty", }, + { + name: "Invalid OS Image URL", + machineConfig: CreateTinkerbellMachineConfig(func(mc *TinkerbellMachineConfig) { + mc.Spec.OSImageURL = "test" + }), + expectedErr: "parsing osImageOverride: parse \"test\": invalid URI for request", + }, } for _, tc := range tests { diff --git a/pkg/providers/tinkerbell/assert.go b/pkg/providers/tinkerbell/assert.go index 99018e6c72b95..9e02417a7ec20 100644 --- a/pkg/providers/tinkerbell/assert.go +++ b/pkg/providers/tinkerbell/assert.go @@ -90,6 +90,11 @@ func AssertOsFamilyValid(spec *ClusterSpec) error { return validateOsFamily(spec) } +// AssertOSImageURL ensures that the OSImageURL value is either set at the datacenter config level or set for each machine config and not at both levels. +func AssertOSImageURL(spec *ClusterSpec) error { + return validateOSImageURL(spec) +} + // AssertcontrolPlaneIPNotInUse ensures the endpoint host for the control plane isn't in use. // The check may be unreliable due to its implementation. func NewIPNotInUseAssertion(client networkutils.NetClient) ClusterSpecAssertion { @@ -264,6 +269,12 @@ type ValidatableCluster interface { // ControlPlaneReplicaCount retrieves the control plane replica count of the ValidatableCluster. ControlPlaneReplicaCount() int + + // ClusterK8sVersion retreives the Cluster level Kubernetes version + ClusterK8sVersion() v1alpha1.KubernetesVersion + + // WorkerGroupK8sVersion maps each worker group with its Kubernetes version. + WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion } // ValidatableTinkerbellClusterSpec wraps around the Tinkerbell ClusterSpec as a ValidatableCluster. @@ -289,6 +300,16 @@ func (v *ValidatableTinkerbellClusterSpec) WorkerNodeHardwareGroups() []WorkerNo return workerNodeGroupConfigs } +// ClusterK8sVersion retrieves the Kubernetes version set at the cluster level. +func (v *ValidatableTinkerbellClusterSpec) ClusterK8sVersion() v1alpha1.KubernetesVersion { + return v.Cluster.Spec.KubernetesVersion +} + +// WorkerNodeGroupK8sVersion returns each worker node group with its associated Kubernetes version. +func (v *ValidatableTinkerbellClusterSpec) WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion { + return WorkerNodeGroupWithK8sVersion(v.ClusterSpec.Spec) +} + // ValidatableTinkerbellCAPI wraps around the Tinkerbell control plane and worker CAPI obects as a ValidatableCluster. type ValidatableTinkerbellCAPI struct { KubeadmControlPlane *controlplanev1.KubeadmControlPlane @@ -313,6 +334,26 @@ func (v *ValidatableTinkerbellCAPI) WorkerNodeHardwareGroups() []WorkerNodeHardw return workerNodeHardwareList } +// ClusterK8sVersion returns the Kubernetes version in major.minor format for a ValidatableTinkerbellCAPI. +func (v *ValidatableTinkerbellCAPI) ClusterK8sVersion() v1alpha1.KubernetesVersion { + return v.toK8sVersion(v.KubeadmControlPlane.Spec.Version) +} + +// WorkerNodeGroupK8sVersion returns each worker node group mapped to Kubernetes version in major.minor format for a ValidatableTinkerbellCAPI. +func (v *ValidatableTinkerbellCAPI) WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion { + wngK8sversion := make(map[string]v1alpha1.KubernetesVersion) + for _, wng := range v.WorkerGroups { + k8sVersion := v.toK8sVersion(*wng.MachineDeployment.Spec.Template.Spec.Version) + wngK8sversion[wng.MachineDeployment.Name] = k8sVersion + } + return wngK8sversion +} + +func (v *ValidatableTinkerbellCAPI) toK8sVersion(k8sversion string) v1alpha1.KubernetesVersion { + kubeVersion := v1alpha1.KubernetesVersion(k8sversion[1:5]) + return kubeVersion +} + // AssertionsForScaleUpDown asserts that catalogue has sufficient hardware to // support the scaling up/down from current ClusterSpec to desired ValidatableCluster. // nolint:gocyclo // TODO: Reduce cyclomatic complexity https://github.com/aws/eks-anywhere-internal/issues/1186 @@ -391,7 +432,7 @@ func AssertionsForScaleUpDown(catalogue *hardware.Catalogue, current Validatable // ExtraHardwareAvailableAssertionForRollingUpgrade asserts that catalogue has sufficient hardware to // support the ClusterSpec during an rolling upgrade workflow. -func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalogue) ClusterSpecAssertion { +func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalogue, current ValidatableCluster, eksaVersionUpgrade bool) ClusterSpecAssertion { return func(spec *ClusterSpec) error { // Without Hardware selectors we get undesirable behavior so ensure we have them for // all MachineConfigs. @@ -403,24 +444,55 @@ func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalo // will account for the same selector being specified on different groups. requirements := minimumHardwareRequirements{} - maxSurge := 1 - if spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil { - maxSurge = spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.RollingUpdate.MaxSurge + if spec.Cluster.Spec.KubernetesVersion != current.ClusterK8sVersion() || eksaVersionUpgrade { + if err := ensureCPHardwareAvailability(spec, current, requirements); err != nil { + return err + } } - err := requirements.Add( - spec.ControlPlaneMachineConfig().Spec.HardwareSelector, - maxSurge, - ) - if err != nil { + + if err := ensureWorkerHardwareAvailability(spec, current, requirements, eksaVersionUpgrade); err != nil { + return err + } + + if spec.HasExternalEtcd() { + return fmt.Errorf("external etcd upgrade is not supported") + } + + if err := validateMinimumHardwareRequirements(requirements, catalogue); err != nil { return fmt.Errorf("for rolling upgrade, %v", err) } + return nil + } +} - for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() { - maxSurge = 1 +func ensureCPHardwareAvailability(spec *ClusterSpec, current ValidatableCluster, hwReq minimumHardwareRequirements) error { + maxSurge := 1 + + if spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil { + maxSurge = spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.RollingUpdate.MaxSurge + } + err := hwReq.Add( + spec.ControlPlaneMachineConfig().Spec.HardwareSelector, + maxSurge, + ) + if err != nil { + return fmt.Errorf("for rolling upgrade, %v", err) + } + return nil +} + +func ensureWorkerHardwareAvailability(spec *ClusterSpec, current ValidatableCluster, hwReq minimumHardwareRequirements, eksaVersionUpgrade bool) error { + currentWngK8sversion := current.WorkerNodeGroupK8sVersion() + desiredWngK8sVersion := WorkerNodeGroupWithK8sVersion(spec.Spec) + for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() { + maxSurge := 1 + // As rolling upgrades and scale up/down is not permitted in a single operation, its safe to access directly using the md name. + mdName := fmt.Sprintf("%s-%s", spec.Cluster.Name, nodeGroup.Name) + if currentWngK8sversion[mdName] != desiredWngK8sVersion[mdName] || eksaVersionUpgrade { if nodeGroup.UpgradeRolloutStrategy != nil { maxSurge = nodeGroup.UpgradeRolloutStrategy.RollingUpdate.MaxSurge } - err := requirements.Add( + err := hwReq.Add( spec.WorkerNodeGroupMachineConfig(nodeGroup).Spec.HardwareSelector, maxSurge, ) @@ -428,16 +500,8 @@ func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalo return fmt.Errorf("for rolling upgrade, %v", err) } } - - if spec.HasExternalEtcd() { - return fmt.Errorf("external etcd upgrade is not supported") - } - - if err := validateMinimumHardwareRequirements(requirements, catalogue); err != nil { - return fmt.Errorf("for rolling upgrade, %v", err) - } - return nil } + return nil } // ensureHardwareSelectorsSpecified ensures each machine config present in spec has a hardware diff --git a/pkg/providers/tinkerbell/assert_test.go b/pkg/providers/tinkerbell/assert_test.go index 92cf031a70e62..c54a15b28be39 100644 --- a/pkg/providers/tinkerbell/assert_test.go +++ b/pkg/providers/tinkerbell/assert_test.go @@ -2,6 +2,7 @@ package tinkerbell_test import ( "errors" + "fmt" "net" "testing" "time" @@ -119,6 +120,42 @@ func TestAssertMachineConfigNamespaceMatchesDatacenterConfig_Different(t *testin g.Expect(err).ToNot(gomega.Succeed()) } +func TestAssertMachineConfigOSImageURL_Error(t *testing.T) { + g := gomega.NewWithT(t) + builder := NewDefaultValidClusterSpecBuilder() + clusterSpec := builder.Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + clusterSpec.DatacenterConfig.Spec.OSImageURL = "test-url" + clusterSpec.MachineConfigs[builder.ControlPlaneMachineName].Spec.OSImageURL = "test-url" + err := tinkerbell.AssertOSImageURL(clusterSpec) + g.Expect(err).ToNot(gomega.Succeed()) +} + +func TestAssertMachineConfigOSImageURLNotSpecified_Error(t *testing.T) { + g := gomega.NewWithT(t) + builder := NewDefaultValidClusterSpecBuilder() + clusterSpec := builder.Build() + clusterSpec.DatacenterConfig.Spec.OSImageURL = "" + // set OsImageURL at machineConfig level but not for all machine configs + clusterSpec.MachineConfigs[builder.ControlPlaneMachineName].Spec.OSImageURL = "test-url" + err := tinkerbell.AssertOSImageURL(clusterSpec) + g.Expect(err).ToNot(gomega.Succeed()) +} + +func TestAssertMachineConfigOSImageURLSpecified_Succeed(t *testing.T) { + g := gomega.NewWithT(t) + builder := NewDefaultValidClusterSpecBuilder() + clusterSpec := builder.Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + clusterSpec.DatacenterConfig.Spec.OSImageURL = "" + // set OsImageURL at machineConfig level but not for all machine configs + clusterSpec.MachineConfigs[builder.ControlPlaneMachineName].Spec.OSImageURL = "test-url" + clusterSpec.MachineConfigs[builder.ExternalEtcdMachineName].Spec.OSImageURL = "test-url" + clusterSpec.MachineConfigs[builder.WorkerNodeGroupMachineName].Spec.OSImageURL = "test-url" + err := tinkerbell.AssertOSImageURL(clusterSpec) + g.Expect(err).To(gomega.Succeed()) +} + func TestAssertEtcdMachineRefExists_Exists(t *testing.T) { g := gomega.NewWithT(t) clusterSpec := NewDefaultValidClusterSpecBuilder().Build() @@ -480,6 +517,27 @@ func TestValidatableClusterWorkerNodeGroupConfigs(t *testing.T) { g.Expect(workerConfigs[0].Replicas).To(gomega.Equal(1)) } +func TestValidatableClusterClusterK8sVersion(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + clusterSpec.Cluster.Spec.KubernetesVersion = eksav1alpha1.Kube125 + validatableCluster := &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec} + + g.Expect(validatableCluster.ClusterK8sVersion()).To(gomega.Equal(eksav1alpha1.Kube125)) +} + +func TestValidatableClusterWorkerNodeGroupK8sVersion(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + kube125 := eksav1alpha1.Kube125 + clusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube125 + validatableCluster := &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec} + wngK8sVersion := validatableCluster.WorkerNodeGroupK8sVersion() + mdName := fmt.Sprintf("%s-%s", clusterSpec.Cluster.Name, clusterSpec.WorkerNodeGroupConfigurations()[0].Name) + + g.Expect(wngK8sVersion[mdName]).To(gomega.Equal(kube125)) +} + func TestValidatableTinkerbellCAPIControlPlaneReplicaCount(t *testing.T) { g := gomega.NewWithT(t) @@ -499,6 +557,24 @@ func TestValidatableTinkerbellCAPIWorkerNodeGroupConfigs(t *testing.T) { g.Expect(workerConfigs[0].Replicas).To(gomega.Equal(1)) } +func TestValidateTinkerbellCAPIClusterK8sVersion(t *testing.T) { + g := gomega.NewWithT(t) + validatableCAPI := validatableTinkerbellCAPI() + validatableCAPI.KubeadmControlPlane.Spec.Version = "v1.27.5-eks-1-27-12" + k8sVersion := validatableCAPI.ClusterK8sVersion() + kube127 := eksav1alpha1.Kube127 + g.Expect(k8sVersion).To(gomega.Equal(kube127)) +} + +func TestValidateTinkerbellCAPIWorkerNodeK8sVersion(t *testing.T) { + g := gomega.NewWithT(t) + validatableCAPI := validatableTinkerbellCAPI() + wngK8sVersion := validatableCAPI.WorkerNodeGroupK8sVersion() + mdName := validatableCAPI.WorkerGroups[0].MachineDeployment.Name + kube121 := eksav1alpha1.Kube121 + g.Expect(wngK8sVersion[mdName]).To(gomega.Equal(kube121)) +} + func TestAssertionsForScaleUpDown_Success(t *testing.T) { g := gomega.NewWithT(t) @@ -584,6 +660,103 @@ func TestAssertionsForScaleUpDown_AddWorkerSuccess(t *testing.T) { g.Expect(assertion(newClusterSpec)).To(gomega.Succeed()) } +func TestAssertionsForRollingUpgrade_CPOnly(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + clusterSpec.Cluster.Spec.KubernetesVersion = eksav1alpha1.Kube124 + catalogue := hardware.NewCatalogue() + _ = catalogue.InsertHardware(&v1alpha1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "cp"}, + }}) + + kube124 := eksav1alpha1.Kube124 + clusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube124 + assertion := tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue, &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec}, false) + newClusterSpec := NewDefaultValidClusterSpecBuilder().Build() + newClusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + newClusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube124 + newClusterSpec.Cluster.Spec.KubernetesVersion = eksav1alpha1.Kube125 + g.Expect(assertion(newClusterSpec)).To(gomega.Succeed()) +} + +func TestAssertionsForRollingUpgrade_WorkerOnly(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + kube124 := eksav1alpha1.Kube124 + clusterSpec.Cluster.Spec.KubernetesVersion = kube124 + catalogue := hardware.NewCatalogue() + _ = catalogue.InsertHardware(&v1alpha1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "worker"}, + }}) + + kube125 := eksav1alpha1.Kube125 + clusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube124 + assertion := tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue, &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec}, false) + newClusterSpec := NewDefaultValidClusterSpecBuilder().Build() + newClusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + newClusterSpec.Cluster.Spec.KubernetesVersion = kube124 + newClusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube125 + g.Expect(assertion(newClusterSpec)).To(gomega.Succeed()) +} + +func TestAssertionsForRollingUpgrade_BothCPWorker(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + kube124 := eksav1alpha1.Kube124 + clusterSpec.Cluster.Spec.KubernetesVersion = kube124 + catalogue := hardware.NewCatalogue() + _ = catalogue.InsertHardware(&v1alpha1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "cp"}, + }}) + _ = catalogue.InsertHardware(&v1alpha1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "worker"}, + }}) + + assertion := tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue, &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec}, false) + newClusterSpec := NewDefaultValidClusterSpecBuilder().Build() + kube125 := eksav1alpha1.Kube125 + newClusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + newClusterSpec.Cluster.Spec.KubernetesVersion = kube125 + g.Expect(assertion(newClusterSpec)).To(gomega.Succeed()) +} + +func TestAssertionsForRollingUpgrade_CPError(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + kube124 := eksav1alpha1.Kube124 + clusterSpec.Cluster.Spec.KubernetesVersion = kube124 + catalogue := hardware.NewCatalogue() + + assertion := tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue, &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec}, false) + newClusterSpec := NewDefaultValidClusterSpecBuilder().Build() + newClusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + newClusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube124 + newClusterSpec.Cluster.Spec.KubernetesVersion = eksav1alpha1.Kube125 + g.Expect(assertion(newClusterSpec)).To(gomega.MatchError(gomega.ContainSubstring("minimum hardware count not met for selector '{\"type\":\"cp\"}'"))) +} + +func TestAssertionsForRollingUpgrade_WorkerError(t *testing.T) { + g := gomega.NewWithT(t) + clusterSpec := NewDefaultValidClusterSpecBuilder().Build() + clusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + kube124 := eksav1alpha1.Kube124 + kube125 := eksav1alpha1.Kube125 + clusterSpec.Cluster.Spec.KubernetesVersion = kube125 + clusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube124 + catalogue := hardware.NewCatalogue() + + assertion := tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue, &tinkerbell.ValidatableTinkerbellClusterSpec{clusterSpec}, false) + newClusterSpec := NewDefaultValidClusterSpecBuilder().Build() + newClusterSpec.Spec.Cluster.Spec.ExternalEtcdConfiguration = nil + newClusterSpec.WorkerNodeGroupConfigurations()[0].KubernetesVersion = &kube125 + newClusterSpec.Cluster.Spec.KubernetesVersion = kube125 + g.Expect(assertion(newClusterSpec)).To(gomega.MatchError(gomega.ContainSubstring("minimum hardware count not met for selector '{\"type\":\"worker\"}'"))) +} + func TestAssertionsForScaleUpDown_ExternalEtcdErrorFails(t *testing.T) { g := gomega.NewWithT(t) diff --git a/pkg/providers/tinkerbell/cluster.go b/pkg/providers/tinkerbell/cluster.go index fcc61f5bc36ff..1a0a3866db97d 100644 --- a/pkg/providers/tinkerbell/cluster.go +++ b/pkg/providers/tinkerbell/cluster.go @@ -108,6 +108,7 @@ func NewClusterSpecValidator(assertions ...ClusterSpecAssertion) *ClusterSpecVal AssertMachineConfigsValid, AssertMachineConfigNamespaceMatchesDatacenterConfig, AssertOsFamilyValid, + AssertOSImageURL, AssertTinkerbellIPAndControlPlaneIPNotSame, AssertHookRetrievableWithoutProxy, ) diff --git a/pkg/providers/tinkerbell/reconciler/reconciler.go b/pkg/providers/tinkerbell/reconciler/reconciler.go index 94a82c6c4278a..5ea5a0e8abec2 100644 --- a/pkg/providers/tinkerbell/reconciler/reconciler.go +++ b/pkg/providers/tinkerbell/reconciler/reconciler.go @@ -367,32 +367,19 @@ func (r *Reconciler) ValidateHardware(ctx context.Context, log logr.Logger, tink switch o { case K8sVersionUpgradeOperation: - v.Register(tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(kubeReader.GetCatalogue())) + validatableCAPI, err := r.getValidatableCAPI(ctx, tinkerbellScope.ClusterSpec.Cluster) + if err != nil { + return controller.Result{}, err + } + // eksa version upgrade cannot be triggered from controller, so set it to false. + v.Register(tinkerbell.ExtraHardwareAvailableAssertionForRollingUpgrade(kubeReader.GetCatalogue(), validatableCAPI, false)) case NewClusterOperation: v.Register(tinkerbell.MinimumHardwareAvailableAssertionForCreate(kubeReader.GetCatalogue())) case NoChange: - currentKCP, err := controller.GetKubeadmControlPlane(ctx, r.client, tinkerbellScope.ClusterSpec.Cluster) + validatableCAPI, err := r.getValidatableCAPI(ctx, tinkerbellScope.ClusterSpec.Cluster) if err != nil { return controller.Result{}, err } - var wgs []*clusterapi.WorkerGroup[*tinkerbellv1.TinkerbellMachineTemplate] - for _, wnc := range tinkerbellScope.ClusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { - md := &clusterv1.MachineDeployment{} - mdName := clusterapi.MachineDeploymentName(tinkerbellScope.ClusterSpec.Cluster, wnc) - key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: mdName} - err := r.client.Get(ctx, key, md) - if err == nil { - wgs = append(wgs, &clusterapi.WorkerGroup[*tinkerbellv1.TinkerbellMachineTemplate]{ - MachineDeployment: md, - }) - } else if !apierrors.IsNotFound(err) { - return controller.Result{}, err - } - } - validatableCAPI := &tinkerbell.ValidatableTinkerbellCAPI{ - KubeadmControlPlane: currentKCP, - WorkerGroups: wgs, - } v.Register(tinkerbell.AssertionsForScaleUpDown(kubeReader.GetCatalogue(), validatableCAPI, false)) } @@ -413,6 +400,32 @@ func (r *Reconciler) ValidateHardware(ctx context.Context, log logr.Logger, tink return controller.Result{}, nil } +func (r *Reconciler) getValidatableCAPI(ctx context.Context, cluster *anywherev1.Cluster) (*tinkerbell.ValidatableTinkerbellCAPI, error) { + currentKCP, err := controller.GetKubeadmControlPlane(ctx, r.client, cluster) + if err != nil { + return nil, err + } + var wgs []*clusterapi.WorkerGroup[*tinkerbellv1.TinkerbellMachineTemplate] + for _, wnc := range cluster.Spec.WorkerNodeGroupConfigurations { + md := &clusterv1.MachineDeployment{} + mdName := clusterapi.MachineDeploymentName(cluster, wnc) + key := types.NamespacedName{Namespace: constants.EksaSystemNamespace, Name: mdName} + err := r.client.Get(ctx, key, md) + if err == nil { + wgs = append(wgs, &clusterapi.WorkerGroup[*tinkerbellv1.TinkerbellMachineTemplate]{ + MachineDeployment: md, + }) + } else if !apierrors.IsNotFound(err) { + return nil, err + } + } + validatableCAPI := &tinkerbell.ValidatableTinkerbellCAPI{ + KubeadmControlPlane: currentKCP, + WorkerGroups: wgs, + } + return validatableCAPI, nil +} + // ValidateRufioMachines checks to ensure all the Rufio machines condition contactable is True. func (r *Reconciler) ValidateRufioMachines(ctx context.Context, log logr.Logger, tinkerbellScope *Scope) (controller.Result, error) { clusterSpec := tinkerbellScope.ClusterSpec diff --git a/pkg/providers/tinkerbell/template.go b/pkg/providers/tinkerbell/template.go index 70db964054c74..4100b092d4ccf 100644 --- a/pkg/providers/tinkerbell/template.go +++ b/pkg/providers/tinkerbell/template.go @@ -69,9 +69,15 @@ func (tb *TemplateBuilder) GenerateCAPISpecControlPlane(clusterSpec *cluster.Spe if err != nil { return nil, err } + var OSImageURL string + if cpTemplateConfig == nil { + OSImageURL = clusterSpec.TinkerbellDatacenter.Spec.OSImageURL + if tb.controlPlaneMachineSpec.OSImageURL != "" { + OSImageURL = tb.controlPlaneMachineSpec.OSImageURL + } versionBundle := bundle.VersionsBundle - cpTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, tb.datacenterSpec.OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.controlPlaneMachineSpec.OSFamily) + cpTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.controlPlaneMachineSpec.OSFamily) } cpTemplateString, err := cpTemplateConfig.ToTemplateString() @@ -83,10 +89,14 @@ func (tb *TemplateBuilder) GenerateCAPISpecControlPlane(clusterSpec *cluster.Spe var etcdTemplateString string if clusterSpec.Cluster.Spec.ExternalEtcdConfiguration != nil { etcdMachineSpec = *tb.etcdMachineSpec + OSImageURL = clusterSpec.TinkerbellDatacenter.Spec.OSImageURL + if etcdMachineSpec.OSImageURL != "" { + OSImageURL = etcdMachineSpec.OSImageURL + } etcdTemplateConfig := clusterSpec.TinkerbellTemplateConfigs[tb.etcdMachineSpec.TemplateRef.Name] if etcdTemplateConfig == nil { versionBundle := bundle.VersionsBundle - etcdTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, tb.datacenterSpec.OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.etcdMachineSpec.OSFamily) + etcdTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.etcdMachineSpec.OSFamily) } etcdTemplateString, err = etcdTemplateConfig.ToTemplateString() if err != nil { @@ -111,12 +121,17 @@ func (tb *TemplateBuilder) GenerateCAPISpecControlPlane(clusterSpec *cluster.Spe func (tb *TemplateBuilder) GenerateCAPISpecWorkers(clusterSpec *cluster.Spec, workloadTemplateNames, kubeadmconfigTemplateNames map[string]string) (content []byte, err error) { workerSpecs := make([][]byte, 0, len(clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations)) bundle := clusterSpec.RootVersionsBundle() + OSImageURL := clusterSpec.TinkerbellDatacenter.Spec.OSImageURL + for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { workerNodeMachineSpec := tb.WorkerNodeGroupMachineSpecs[workerNodeGroupConfiguration.MachineGroupRef.Name] wTemplateConfig := clusterSpec.TinkerbellTemplateConfigs[workerNodeMachineSpec.TemplateRef.Name] if wTemplateConfig == nil { versionBundle := bundle.VersionsBundle - wTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, tb.datacenterSpec.OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, workerNodeMachineSpec.OSFamily) + if workerNodeMachineSpec.OSImageURL != "" { + OSImageURL = workerNodeMachineSpec.OSImageURL + } + wTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, workerNodeMachineSpec.OSFamily) } wTemplateString, err := wTemplateConfig.ToTemplateString() diff --git a/pkg/providers/tinkerbell/template_test.go b/pkg/providers/tinkerbell/template_test.go index 29bffc977e514..389934a26955a 100644 --- a/pkg/providers/tinkerbell/template_test.go +++ b/pkg/providers/tinkerbell/template_test.go @@ -85,3 +85,54 @@ func TestBuildTemplateMapCPFailAuditPolicy(t *testing.T) { _, err = buildTemplateMapCP(clusterSpec, *controlPlaneMachineSpec, *etcdMachineSpec, cpTemplateOverride, etcdTemplateOverride, *clusterSpec.TinkerbellDatacenter.Spec.DeepCopy()) g.Expect(err).To(HaveOccurred()) } + +func TestGenerateTemplateBuilderForMachineConfigOsImageURL(t *testing.T) { + g := NewWithT(t) + testFile := "testdata/cluster_osimage_machine_config.yaml" + clusterSpec := test.NewFullClusterSpec(t, testFile) + + expectedControlPlaneMachineSpec := &v1alpha1.TinkerbellMachineConfigSpec{ + HardwareSelector: map[string]string{"type": "cp"}, + TemplateRef: v1alpha1.Ref{ + Kind: "TinkerbellTemplateConfig", + Name: "tink-test", + }, + OSFamily: "ubuntu", + OSImageURL: "https://ubuntu.gz", + Users: []v1alpha1.UserConfiguration{ + { + Name: "tink-user", + SshAuthorizedKeys: []string{"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC1BK73XhIzjX+meUr7pIYh6RHbvI3tmHeQIXY5lv7aztN1UoX+bhPo3dwo2sfSQn5kuxgQdnxIZ/CTzy0p0GkEYVv3gwspCeurjmu0XmrdmaSGcGxCEWT/65NtvYrQtUE5ELxJ+N/aeZNlK2B7IWANnw/82913asXH4VksV1NYNduP0o1/G4XcwLLSyVFB078q/oEnmvdNIoS61j4/o36HVtENJgYr0idcBvwJdvcGxGnPaqOhx477t+kfJAa5n5dSA5wilIaoXH5i1Tf/HsTCM52L+iNCARvQzJYZhzbWI1MDQwzILtIBEQCJsl2XSqIupleY8CxqQ6jCXt2mhae+wPc3YmbO5rFvr2/EvC57kh3yDs1Nsuj8KOvD78KeeujbR8n8pScm3WDp62HFQ8lEKNdeRNj6kB8WnuaJvPnyZfvzOhwG65/9w13IBl7B1sWxbFnq2rMpm5uHVK7mAmjL0Tt8zoDhcE1YJEnp9xte3/pvmKPkST5Q/9ZtR9P5sI+02jY0fvPkPyC03j2gsPixG7rpOCwpOdbny4dcj0TDeeXJX8er+oVfJuLYz0pNWJcT2raDdFfcqvYA0B0IyNYlj5nWX4RuEcyT3qocLReWPnZojetvAG/H8XwOh7fEVGqHAKOVSnPXCSQJPl6s0H12jPJBDJMTydtYPEszl4/CeQ=="}, + }, + }, + } + gotExpectedControlPlaneMachineSpec, err := getControlPlaneMachineSpec(clusterSpec) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(gotExpectedControlPlaneMachineSpec).To(Equal(expectedControlPlaneMachineSpec)) + + expectedWorkerNodeGroupMachineSpec := map[string]v1alpha1.TinkerbellMachineConfigSpec{ + "test-md": { + HardwareSelector: map[string]string{"type": "worker"}, + TemplateRef: v1alpha1.Ref{ + Kind: "TinkerbellTemplateConfig", + Name: "tink-test", + }, + OSFamily: "ubuntu", + OSImageURL: "https://ubuntu.gz", + Users: []v1alpha1.UserConfiguration{ + { + Name: "tink-user", + SshAuthorizedKeys: []string{"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC1BK73XhIzjX+meUr7pIYh6RHbvI3tmHeQIXY5lv7aztN1UoX+bhPo3dwo2sfSQn5kuxgQdnxIZ/CTzy0p0GkEYVv3gwspCeurjmu0XmrdmaSGcGxCEWT/65NtvYrQtUE5ELxJ+N/aeZNlK2B7IWANnw/82913asXH4VksV1NYNduP0o1/G4XcwLLSyVFB078q/oEnmvdNIoS61j4/o36HVtENJgYr0idcBvwJdvcGxGnPaqOhx477t+kfJAa5n5dSA5wilIaoXH5i1Tf/HsTCM52L+iNCARvQzJYZhzbWI1MDQwzILtIBEQCJsl2XSqIupleY8CxqQ6jCXt2mhae+wPc3YmbO5rFvr2/EvC57kh3yDs1Nsuj8KOvD78KeeujbR8n8pScm3WDp62HFQ8lEKNdeRNj6kB8WnuaJvPnyZfvzOhwG65/9w13IBl7B1sWxbFnq2rMpm5uHVK7mAmjL0Tt8zoDhcE1YJEnp9xte3/pvmKPkST5Q/9ZtR9P5sI+02jY0fvPkPyC03j2gsPixG7rpOCwpOdbny4dcj0TDeeXJX8er+oVfJuLYz0pNWJcT2raDdFfcqvYA0B0IyNYlj5nWX4RuEcyT3qocLReWPnZojetvAG/H8XwOh7fEVGqHAKOVSnPXCSQJPl6s0H12jPJBDJMTydtYPEszl4/CeQ== testemail@test.com"}, + }, + }, + }, + } + gotWorkerNodeGroupMachineSpec, err := getWorkerNodeGroupMachineSpec(clusterSpec) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(gotWorkerNodeGroupMachineSpec).To(Equal(expectedWorkerNodeGroupMachineSpec)) + + gotEtcdMachineSpec, err := getEtcdMachineSpec(clusterSpec) + var expectedEtcdMachineSpec *v1alpha1.TinkerbellMachineConfigSpec + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(gotEtcdMachineSpec).To(Equal(expectedEtcdMachineSpec)) +} \ No newline at end of file diff --git a/pkg/providers/tinkerbell/testdata/cluster_osimage_machine_config.yaml b/pkg/providers/tinkerbell/testdata/cluster_osimage_machine_config.yaml new file mode 100644 index 0000000000000..23d01f8a81f28 --- /dev/null +++ b/pkg/providers/tinkerbell/testdata/cluster_osimage_machine_config.yaml @@ -0,0 +1,238 @@ +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: Cluster +metadata: + name: test + namespace: test-namespace +spec: + clusterNetwork: + cni: cilium + pods: + cidrBlocks: + - 192.168.0.0/16 + services: + cidrBlocks: + - 10.96.0.0/12 + controlPlaneConfiguration: + upgradeRolloutStrategy: + type: "RollingUpdate" + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + count: 1 + endpoint: + host: 1.2.3.4 + machineGroupRef: + name: test-cp + kind: TinkerbellMachineConfig + datacenterRef: + kind: TinkerbellDatacenterConfig + name: test + identityProviderRefs: + - kind: AWSIamConfig + name: eksa-unit-test + kubernetesVersion: "1.21" + managementCluster: + name: test + workerNodeGroupConfigurations: + - count: 1 + upgradeRolloutStrategy: + type: "RollingUpdate" + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + machineGroupRef: + name: test-md + kind: TinkerbellMachineConfig + kubernetesVersion: "1.21" + +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: TinkerbellDatacenterConfig +metadata: + name: test + namespace: test-namespace +spec: + tinkerbellIP: "5.6.7.8" + + +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: TinkerbellMachineConfig +metadata: + name: test-cp + namespace: test-namespace +spec: + hardwareSelector: + type: "cp" + osFamily: ubuntu + osImageURL: "https://ubuntu.gz" + templateRef: + kind: TinkerbellTemplateConfig + name: tink-test + users: + - name: tink-user + sshAuthorizedKeys: + - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC1BK73XhIzjX+meUr7pIYh6RHbvI3tmHeQIXY5lv7aztN1UoX+bhPo3dwo2sfSQn5kuxgQdnxIZ/CTzy0p0GkEYVv3gwspCeurjmu0XmrdmaSGcGxCEWT/65NtvYrQtUE5ELxJ+N/aeZNlK2B7IWANnw/82913asXH4VksV1NYNduP0o1/G4XcwLLSyVFB078q/oEnmvdNIoS61j4/o36HVtENJgYr0idcBvwJdvcGxGnPaqOhx477t+kfJAa5n5dSA5wilIaoXH5i1Tf/HsTCM52L+iNCARvQzJYZhzbWI1MDQwzILtIBEQCJsl2XSqIupleY8CxqQ6jCXt2mhae+wPc3YmbO5rFvr2/EvC57kh3yDs1Nsuj8KOvD78KeeujbR8n8pScm3WDp62HFQ8lEKNdeRNj6kB8WnuaJvPnyZfvzOhwG65/9w13IBl7B1sWxbFnq2rMpm5uHVK7mAmjL0Tt8zoDhcE1YJEnp9xte3/pvmKPkST5Q/9ZtR9P5sI+02jY0fvPkPyC03j2gsPixG7rpOCwpOdbny4dcj0TDeeXJX8er+oVfJuLYz0pNWJcT2raDdFfcqvYA0B0IyNYlj5nWX4RuEcyT3qocLReWPnZojetvAG/H8XwOh7fEVGqHAKOVSnPXCSQJPl6s0H12jPJBDJMTydtYPEszl4/CeQ==" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: TinkerbellMachineConfig +metadata: + name: test-md + namespace: test-namespace +spec: + hardwareSelector: + type: "worker" + osFamily: ubuntu + osImageURL: "https://ubuntu.gz" + templateRef: + kind: TinkerbellTemplateConfig + name: tink-test + users: + - name: tink-user + sshAuthorizedKeys: + - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC1BK73XhIzjX+meUr7pIYh6RHbvI3tmHeQIXY5lv7aztN1UoX+bhPo3dwo2sfSQn5kuxgQdnxIZ/CTzy0p0GkEYVv3gwspCeurjmu0XmrdmaSGcGxCEWT/65NtvYrQtUE5ELxJ+N/aeZNlK2B7IWANnw/82913asXH4VksV1NYNduP0o1/G4XcwLLSyVFB078q/oEnmvdNIoS61j4/o36HVtENJgYr0idcBvwJdvcGxGnPaqOhx477t+kfJAa5n5dSA5wilIaoXH5i1Tf/HsTCM52L+iNCARvQzJYZhzbWI1MDQwzILtIBEQCJsl2XSqIupleY8CxqQ6jCXt2mhae+wPc3YmbO5rFvr2/EvC57kh3yDs1Nsuj8KOvD78KeeujbR8n8pScm3WDp62HFQ8lEKNdeRNj6kB8WnuaJvPnyZfvzOhwG65/9w13IBl7B1sWxbFnq2rMpm5uHVK7mAmjL0Tt8zoDhcE1YJEnp9xte3/pvmKPkST5Q/9ZtR9P5sI+02jY0fvPkPyC03j2gsPixG7rpOCwpOdbny4dcj0TDeeXJX8er+oVfJuLYz0pNWJcT2raDdFfcqvYA0B0IyNYlj5nWX4RuEcyT3qocLReWPnZojetvAG/H8XwOh7fEVGqHAKOVSnPXCSQJPl6s0H12jPJBDJMTydtYPEszl4/CeQ== testemail@test.com" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: TinkerbellMachineConfig +metadata: + name: test-etcd + namespace: test-namespace +spec: + hardwareSelector: + type: "etcd" + osFamily: ubuntu + osImageURL: "https://ubuntu.gz" + templateRef: + kind: TinkerbellTemplateConfig + name: tink-test + users: + - name: tink-user + sshAuthorizedKeys: + - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC1BK73XhIzjX+meUr7pIYh6RHbvI3tmHeQIXY5lv7aztN1UoX+bhPo3dwo2sfSQn5kuxgQdnxIZ/CTzy0p0GkEYVv3gwspCeurjmu0XmrdmaSGcGxCEWT/65NtvYrQtUE5ELxJ+N/aeZNlK2B7IWANnw/82913asXH4VksV1NYNduP0o1/G4XcwLLSyVFB078q/oEnmvdNIoS61j4/o36HVtENJgYr0idcBvwJdvcGxGnPaqOhx477t+kfJAa5n5dSA5wilIaoXH5i1Tf/HsTCM52L+iNCARvQzJYZhzbWI1MDQwzILtIBEQCJsl2XSqIupleY8CxqQ6jCXt2mhae+wPc3YmbO5rFvr2/EvC57kh3yDs1Nsuj8KOvD78KeeujbR8n8pScm3WDp62HFQ8lEKNdeRNj6kB8WnuaJvPnyZfvzOhwG65/9w13IBl7B1sWxbFnq2rMpm5uHVK7mAmjL0Tt8zoDhcE1YJEnp9xte3/pvmKPkST5Q/9ZtR9P5sI+02jY0fvPkPyC03j2gsPixG7rpOCwpOdbny4dcj0TDeeXJX8er+oVfJuLYz0pNWJcT2raDdFfcqvYA0B0IyNYlj5nWX4RuEcyT3qocLReWPnZojetvAG/H8XwOh7fEVGqHAKOVSnPXCSQJPl6s0H12jPJBDJMTydtYPEszl4/CeQ== testemail@test.com" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: TinkerbellTemplateConfig +metadata: + name: tink-test +spec: + template: + global_timeout: 6000 + id: "" + name: tink-test + tasks: + - actions: + - environment: + COMPRESSED: "true" + DEST_DISK: /dev/sda + IMG_URL: "" + image: image2disk:v1.0.0 + name: stream-image + timeout: 360 + - environment: + BLOCK_DEVICE: /dev/sda2 + CHROOT: "y" + CMD_LINE: apt -y update && apt -y install openssl + DEFAULT_INTERPRETER: /bin/sh -c + FS_TYPE: ext4 + image: cexec:v1.0.0 + name: install-openssl + timeout: 90 + - environment: + CONTENTS: | + network: + version: 2 + renderer: networkd + ethernets: + eno1: + dhcp4: true + eno2: + dhcp4: true + eno3: + dhcp4: true + eno4: + dhcp4: true + DEST_DISK: /dev/sda2 + DEST_PATH: /etc/netplan/config.yaml + DIRMODE: "0755" + FS_TYPE: ext4 + GID: "0" + MODE: "0644" + UID: "0" + image: writefile:v1.0.0 + name: write-netplan + timeout: 90 + - environment: + CONTENTS: | + datasource: + Ec2: + metadata_urls: [] + strict_id: false + system_info: + default_user: + name: tink + groups: [wheel, adm] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + manage_etc_hosts: localhost + warnings: + dsid_missing_source: off + DEST_DISK: /dev/sda2 + DEST_PATH: /etc/cloud/cloud.cfg.d/10_tinkerbell.cfg + DIRMODE: "0700" + FS_TYPE: ext4 + GID: "0" + MODE: "0600" + image: writefile:v1.0.0 + name: add-tink-cloud-init-config + timeout: 90 + - environment: + CONTENTS: | + datasource: Ec2 + DEST_DISK: /dev/sda2 + DEST_PATH: /etc/cloud/ds-identify.cfg + DIRMODE: "0700" + FS_TYPE: ext4 + GID: "0" + MODE: "0600" + UID: "0" + image: writefile:v1.0.0 + name: add-tink-cloud-init-ds-config + timeout: 90 + - environment: + BLOCK_DEVICE: /dev/sda2 + FS_TYPE: ext4 + image: kexec:v1.0.0 + name: kexec-image + pid: host + timeout: 90 + name: tink-test + volumes: + - /dev:/dev + - /dev/console:/dev/console + - /lib/firmware:/lib/firmware:ro + worker: "{{.device_1}}" + version: "0.1" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: AWSIamConfig +metadata: + name: eksa-unit-test + namespace: test-namespace +spec: + awsRegion: test-region + backendMode: + - mode1 + - mode2 + mapRoles: + - groups: + - group1 + - group2 + roleARN: test-role-arn + username: test + mapUsers: + - groups: + - group1 + - group2 + userARN: test-user-arn + username: test +--- diff --git a/pkg/providers/tinkerbell/upgrade.go b/pkg/providers/tinkerbell/upgrade.go index efebea19be7c7..2d93384e9cb54 100644 --- a/pkg/providers/tinkerbell/upgrade.go +++ b/pkg/providers/tinkerbell/upgrade.go @@ -145,16 +145,16 @@ func (p *Provider) validateAvailableHardwareForUpgrade(ctx context.Context, curr clusterSpecValidator := NewClusterSpecValidator( HardwareSatisfiesOnlyOneSelectorAssertion(p.catalogue), ) - - rollingUpgrade := false - if currentSpec.Cluster.Spec.KubernetesVersion != newClusterSpec.Cluster.Spec.KubernetesVersion || - currentSpec.Bundles.Spec.Number != newClusterSpec.Bundles.Spec.Number { - clusterSpecValidator.Register(ExtraHardwareAvailableAssertionForRollingUpgrade(p.catalogue)) - rollingUpgrade = true - } + eksaVersionUpgrade := currentSpec.Bundles.Spec.Number != newClusterSpec.Bundles.Spec.Number currentTinkerbellSpec := NewClusterSpec(currentSpec, currentSpec.TinkerbellMachineConfigs, currentSpec.TinkerbellDatacenter) - clusterSpecValidator.Register(AssertionsForScaleUpDown(p.catalogue, &ValidatableTinkerbellClusterSpec{currentTinkerbellSpec}, rollingUpgrade)) + rollingUpgrade := p.isRollingUpgrade(currentSpec, newClusterSpec) + currentCluster := &ValidatableTinkerbellClusterSpec{currentTinkerbellSpec} + if rollingUpgrade || eksaVersionUpgrade { + clusterSpecValidator.Register(ExtraHardwareAvailableAssertionForRollingUpgrade(p.catalogue, currentCluster, eksaVersionUpgrade)) + } + // ScaleUpDown should not be supported in case of either rolling upgrade or eksa version upgrade. + clusterSpecValidator.Register(AssertionsForScaleUpDown(p.catalogue, currentCluster, rollingUpgrade || eksaVersionUpgrade)) tinkerbellClusterSpec := NewClusterSpec(newClusterSpec, p.machineConfigs, p.datacenterConfig) @@ -371,6 +371,35 @@ func (p *Provider) isScaleUpDown(oldCluster *v1alpha1.Cluster, newCluster *v1alp return false } +func (p *Provider) isRollingUpgrade(currentSpec, newClusterSpec *cluster.Spec) bool { + if currentSpec.Cluster.Spec.KubernetesVersion != newClusterSpec.Cluster.Spec.KubernetesVersion { + return true + } + currentWNGSwithK8sVersion := WorkerNodeGroupWithK8sVersion(currentSpec) + desiredWNGwithK8sVersion := WorkerNodeGroupWithK8sVersion(newClusterSpec) + for wngName, K8sVersion := range desiredWNGwithK8sVersion { + currentWngK8sVersion, ok := currentWNGSwithK8sVersion[wngName] + if ok && (currentWngK8sVersion != K8sVersion) { + return true + } + } + return false +} + +// WorkerNodeGroupWithK8sVersion maps each worker node group configurations in s to its K8s version. +func WorkerNodeGroupWithK8sVersion(spec *cluster.Spec) map[string]v1alpha1.KubernetesVersion { + WNGwithK8sVersion := make(map[string]v1alpha1.KubernetesVersion) + K8sVersion := spec.Cluster.Spec.KubernetesVersion + for _, wng := range spec.Cluster.Spec.WorkerNodeGroupConfigurations { + mdName := fmt.Sprintf("%s-%s", spec.Cluster.Name, wng.Name) + if wng.KubernetesVersion != nil { + K8sVersion = *wng.KubernetesVersion + } + WNGwithK8sVersion[mdName] = K8sVersion + } + return WNGwithK8sVersion +} + func (p *Provider) validateMachineCfg(ctx context.Context, cluster *types.Cluster, newConfig *v1alpha1.TinkerbellMachineConfig, clusterSpec *cluster.Spec) error { prevMachineConfig, err := p.providerKubectlClient.GetEksaTinkerbellMachineConfig(ctx, newConfig.Name, cluster.KubeconfigFile, clusterSpec.Cluster.Namespace) if err != nil { diff --git a/pkg/providers/tinkerbell/upgrade_test.go b/pkg/providers/tinkerbell/upgrade_test.go index 2d7aa3e46ea13..d4ace166c7df2 100644 --- a/pkg/providers/tinkerbell/upgrade_test.go +++ b/pkg/providers/tinkerbell/upgrade_test.go @@ -21,6 +21,7 @@ import ( "github.com/aws/eks-anywhere/pkg/constants" "github.com/aws/eks-anywhere/pkg/filewriter" filewritermocks "github.com/aws/eks-anywhere/pkg/filewriter/mocks" + "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/mocks" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/rufiounreleased" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/stack" @@ -969,3 +970,152 @@ func TestProvider_ValidateNewSpec_NewWorkerNodeGroup(t *testing.T) { t.Fatal(err) } } + +func TestProviderValidateAvailableHardwareOnlyCPUpgradeSuccess(t *testing.T) { + clusterSpecManifest := "cluster_osimage_machine_config.yaml" + mockCtrl := gomock.NewController(t) + clusterSpec := givenClusterSpec(t, clusterSpecManifest) + datacenterConfig := givenDatacenterConfig(t, clusterSpecManifest) + machineConfigs := givenMachineConfigs(t, clusterSpecManifest) + docker := stackmocks.NewMockDocker(mockCtrl) + helm := stackmocks.NewMockHelm(mockCtrl) + kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) + stackInstaller := stackmocks.NewMockStackInstaller(mockCtrl) + writer := filewritermocks.NewMockFileWriter(mockCtrl) + ctx := context.Background() + provider := newTinkerbellProvider(datacenterConfig, machineConfigs, clusterSpec.Cluster, writer, docker, helm, kubectl) + provider.stackInstaller = stackInstaller + + clusterSpec.ManagementCluster = &types.Cluster{Name: "test", KubeconfigFile: "kubeconfig-file"} + clusterSpec.Cluster.Spec.ManagementCluster = v1alpha1.ManagementCluster{Name: "test-mgmt"} + catalogue := hardware.NewCatalogue() + newCluster := clusterSpec.DeepCopy() + newCluster.Cluster.Spec.KubernetesVersion = v1alpha1.Kube122 + _ = catalogue.InsertHardware(&tinkv1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "cp"}, + }}) + provider.catalogue = catalogue + err := provider.validateAvailableHardwareForUpgrade(ctx, clusterSpec, newCluster) + if err != nil { + t.Fatal(err) + } +} + +func TestProviderValidateAvailableHardwareOnlyCPUpgradeError(t *testing.T) { + clusterSpecManifest := "cluster_osimage_machine_config.yaml" + mockCtrl := gomock.NewController(t) + clusterSpec := givenClusterSpec(t, clusterSpecManifest) + datacenterConfig := givenDatacenterConfig(t, clusterSpecManifest) + machineConfigs := givenMachineConfigs(t, clusterSpecManifest) + docker := stackmocks.NewMockDocker(mockCtrl) + helm := stackmocks.NewMockHelm(mockCtrl) + kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) + stackInstaller := stackmocks.NewMockStackInstaller(mockCtrl) + writer := filewritermocks.NewMockFileWriter(mockCtrl) + ctx := context.Background() + provider := newTinkerbellProvider(datacenterConfig, machineConfigs, clusterSpec.Cluster, writer, docker, helm, kubectl) + provider.stackInstaller = stackInstaller + + clusterSpec.ManagementCluster = &types.Cluster{Name: "test", KubeconfigFile: "kubeconfig-file"} + clusterSpec.Cluster.Spec.ManagementCluster = v1alpha1.ManagementCluster{Name: "test-mgmt"} + catalogue := hardware.NewCatalogue() + newCluster := clusterSpec.DeepCopy() + newCluster.Cluster.Spec.KubernetesVersion = v1alpha1.Kube122 + provider.catalogue = catalogue + err := provider.validateAvailableHardwareForUpgrade(ctx, clusterSpec, newCluster) + if err == nil || !strings.Contains(err.Error(), "for rolling upgrade, minimum hardware count not met for selector '{\"type\":\"cp\"}'") { + t.Fatal(err) + } +} + +func TestProviderValidateAvailableHardwareOnlyWorkerUpgradeSuccess(t *testing.T) { + clusterSpecManifest := "cluster_osimage_machine_config.yaml" + mockCtrl := gomock.NewController(t) + clusterSpec := givenClusterSpec(t, clusterSpecManifest) + datacenterConfig := givenDatacenterConfig(t, clusterSpecManifest) + machineConfigs := givenMachineConfigs(t, clusterSpecManifest) + docker := stackmocks.NewMockDocker(mockCtrl) + helm := stackmocks.NewMockHelm(mockCtrl) + kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) + stackInstaller := stackmocks.NewMockStackInstaller(mockCtrl) + writer := filewritermocks.NewMockFileWriter(mockCtrl) + ctx := context.Background() + provider := newTinkerbellProvider(datacenterConfig, machineConfigs, clusterSpec.Cluster, writer, docker, helm, kubectl) + provider.stackInstaller = stackInstaller + + clusterSpec.ManagementCluster = &types.Cluster{Name: "test", KubeconfigFile: "kubeconfig-file"} + clusterSpec.Cluster.Spec.ManagementCluster = v1alpha1.ManagementCluster{Name: "test-mgmt"} + catalogue := hardware.NewCatalogue() + newCluster := clusterSpec.DeepCopy() + kube122 := v1alpha1.Kube122 + newCluster.Cluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube122 + _ = catalogue.InsertHardware(&tinkv1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "worker"}, + }}) + provider.catalogue = catalogue + err := provider.validateAvailableHardwareForUpgrade(ctx, clusterSpec, newCluster) + if err != nil { + t.Fatal(err) + } +} + +func TestProviderValidateAvailableHardwareOnlyWorkerUpgradeError(t *testing.T) { + clusterSpecManifest := "cluster_osimage_machine_config.yaml" + mockCtrl := gomock.NewController(t) + clusterSpec := givenClusterSpec(t, clusterSpecManifest) + datacenterConfig := givenDatacenterConfig(t, clusterSpecManifest) + machineConfigs := givenMachineConfigs(t, clusterSpecManifest) + docker := stackmocks.NewMockDocker(mockCtrl) + helm := stackmocks.NewMockHelm(mockCtrl) + kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) + stackInstaller := stackmocks.NewMockStackInstaller(mockCtrl) + writer := filewritermocks.NewMockFileWriter(mockCtrl) + ctx := context.Background() + provider := newTinkerbellProvider(datacenterConfig, machineConfigs, clusterSpec.Cluster, writer, docker, helm, kubectl) + provider.stackInstaller = stackInstaller + + clusterSpec.ManagementCluster = &types.Cluster{Name: "test", KubeconfigFile: "kubeconfig-file"} + clusterSpec.Cluster.Spec.ManagementCluster = v1alpha1.ManagementCluster{Name: "test-mgmt"} + catalogue := hardware.NewCatalogue() + newCluster := clusterSpec.DeepCopy() + kube122 := v1alpha1.Kube122 + newCluster.Cluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube122 + provider.catalogue = catalogue + err := provider.validateAvailableHardwareForUpgrade(ctx, clusterSpec, newCluster) + if err == nil || !strings.Contains(err.Error(), "for rolling upgrade, minimum hardware count not met for selector '{\"type\":\"worker\"}'") { + t.Fatal(err) + } +} + +func TestProviderValidateAvailableHardwareEksaVersionUpgradeSuccess(t *testing.T) { + clusterSpecManifest := "cluster_osimage_machine_config.yaml" + mockCtrl := gomock.NewController(t) + clusterSpec := givenClusterSpec(t, clusterSpecManifest) + datacenterConfig := givenDatacenterConfig(t, clusterSpecManifest) + machineConfigs := givenMachineConfigs(t, clusterSpecManifest) + docker := stackmocks.NewMockDocker(mockCtrl) + helm := stackmocks.NewMockHelm(mockCtrl) + kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) + stackInstaller := stackmocks.NewMockStackInstaller(mockCtrl) + writer := filewritermocks.NewMockFileWriter(mockCtrl) + ctx := context.Background() + provider := newTinkerbellProvider(datacenterConfig, machineConfigs, clusterSpec.Cluster, writer, docker, helm, kubectl) + provider.stackInstaller = stackInstaller + + clusterSpec.ManagementCluster = &types.Cluster{Name: "test", KubeconfigFile: "kubeconfig-file"} + clusterSpec.Cluster.Spec.ManagementCluster = v1alpha1.ManagementCluster{Name: "test-mgmt"} + catalogue := hardware.NewCatalogue() + newCluster := clusterSpec.DeepCopy() + newCluster.Bundles.Spec.Number++ + _ = catalogue.InsertHardware(&tinkv1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "cp"}, + }}) + _ = catalogue.InsertHardware(&tinkv1.Hardware{ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"type": "worker"}, + }}) + provider.catalogue = catalogue + err := provider.validateAvailableHardwareForUpgrade(ctx, clusterSpec, newCluster) + if err != nil { + t.Fatal(err) + } +} diff --git a/pkg/providers/tinkerbell/validate.go b/pkg/providers/tinkerbell/validate.go index 2ddcccc97d810..3e036ae96f57b 100644 --- a/pkg/providers/tinkerbell/validate.go +++ b/pkg/providers/tinkerbell/validate.go @@ -29,12 +29,25 @@ func validateOsFamily(spec *ClusterSpec) error { } } - if controlPlaneOsFamily != v1alpha1.Bottlerocket && spec.DatacenterConfig.Spec.OSImageURL == "" { + if controlPlaneOsFamily != v1alpha1.Bottlerocket && spec.DatacenterConfig.Spec.OSImageURL == "" && spec.ControlPlaneMachineConfig().Spec.OSImageURL == "" { return fmt.Errorf("please use bottlerocket as osFamily for auto-importing or provide a valid osImageURL") } return nil } +func validateOSImageURL(spec *ClusterSpec) error { + dcOSImageURL := spec.DatacenterConfig.Spec.OSImageURL + for _, mc := range spec.MachineConfigs { + if mc.Spec.OSImageURL != "" && dcOSImageURL != "" { + return fmt.Errorf("cannot specify OSImageURL on both TinkerbellMachineConfig's and TinkerbellDatacenterConfig") + } + if mc.Spec.OSImageURL == "" && dcOSImageURL == "" && mc.Spec.OSFamily != v1alpha1.Bottlerocket { + return fmt.Errorf("missing OSImageURL on TinkerbellMachineConfig '%s'", mc.ObjectMeta.Name) + } + } + return nil +} + func validateMachineRefExists( ref *v1alpha1.Ref, machineConfigs map[string]*v1alpha1.TinkerbellMachineConfig,