Skip to content

Commit

Permalink
Enable modular upgrades for tinkerbell provider in CLI
Browse files Browse the repository at this point in the history
Signed-off-by: Rahul Ganesh <[email protected]>
  • Loading branch information
rahulbabu95 authored and Rahul Ganesh committed Oct 9, 2023
1 parent 5a7934c commit 5a490f7
Show file tree
Hide file tree
Showing 17 changed files with 821 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ spec:
type: object
osFamily:
type: string
osImageURL:
type: string
templateRef:
properties:
kind:
Expand All @@ -153,6 +155,7 @@ spec:
required:
- hardwareSelector
- osFamily
- osImageURL
type: object
status:
description: TinkerbellMachineConfigStatus defines the observed state
Expand Down
3 changes: 3 additions & 0 deletions config/manifest/eksa-components.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5745,6 +5745,8 @@ spec:
type: object
osFamily:
type: string
osImageURL:
type: string
templateRef:
properties:
kind:
Expand All @@ -5771,6 +5773,7 @@ spec:
required:
- hardwareSelector
- osFamily
- osImageURL
type: object
status:
description: TinkerbellMachineConfigStatus defines the observed state
Expand Down
7 changes: 0 additions & 7 deletions pkg/api/v1alpha1/cluster_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,6 @@ func validateKubeVersionSkew(newVersion, oldVersion KubernetesVersion, path *fie
// ValidateWorkerKubernetesVersionSkew validates worker node group Kubernetes version skew between upgrades.
func ValidateWorkerKubernetesVersionSkew(new, old *Cluster) field.ErrorList {
var allErrs field.ErrorList
path := field.NewPath("spec").Child("WorkerNodeConfiguration.kubernetesVersion")

newClusterVersion := new.Spec.KubernetesVersion
oldClusterVersion := old.Spec.KubernetesVersion

Expand All @@ -485,11 +483,6 @@ func ValidateWorkerKubernetesVersionSkew(new, old *Cluster) field.ErrorList {
for _, nodeGroupNewSpec := range new.Spec.WorkerNodeGroupConfigurations {
newVersion := nodeGroupNewSpec.KubernetesVersion

if newVersion != nil && nodeGroupNewSpec.MachineGroupRef.Kind == TinkerbellMachineConfigKind {
allErrs = append(allErrs, field.Forbidden(path, "worker node group level kubernetesVersion is not supported for Tinkerbell"))
return allErrs
}

if workerNodeGrpOldSpec, ok := workerNodeGroupMap[nodeGroupNewSpec.Name]; ok {
oldVersion := workerNodeGrpOldSpec.KubernetesVersion
allErrs = append(allErrs, performWorkerKubernetesValidations(oldVersion, newVersion, oldClusterVersion, newClusterVersion)...)
Expand Down
27 changes: 0 additions & 27 deletions pkg/api/v1alpha1/cluster_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2331,30 +2331,3 @@ func TestValidateWorkerVersionSkewAddNodeGroup(t *testing.T) {
g := NewWithT(t)
g.Expect(err).To(Succeed())
}

func TestValidateWorkerVersionBlockTinkerbell(t *testing.T) {
kube119 := v1alpha1.KubernetesVersion("1.19")

newCluster := baseCluster()
newCluster.Spec.KubernetesVersion = kube119
newCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119
newCluster.Spec.WorkerNodeGroupConfigurations[0].MachineGroupRef.Kind = v1alpha1.TinkerbellMachineConfigKind
newWorker := v1alpha1.WorkerNodeGroupConfiguration{
Name: "md-1",
Count: ptr.Int(1),
MachineGroupRef: &v1alpha1.Ref{
Kind: v1alpha1.TinkerbellMachineConfigKind,
Name: "eksa-unit-test",
},
KubernetesVersion: &kube119,
}
newCluster.Spec.WorkerNodeGroupConfigurations = append(newCluster.Spec.WorkerNodeGroupConfigurations, newWorker)

oldCluster := baseCluster()
oldCluster.Spec.KubernetesVersion = kube119
oldCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119

err := newCluster.ValidateUpdate(oldCluster)
g := NewWithT(t)
g.Expect(err).ToNot(BeNil())
}
7 changes: 7 additions & 0 deletions pkg/api/v1alpha1/tinkerbellmachineconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package v1alpha1

import (
"fmt"
"net/url"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
Expand Down Expand Up @@ -91,6 +92,12 @@ func validateTinkerbellMachineConfig(config *TinkerbellMachineConfig) error {
)
}

if config.Spec.OSImageURL != "" {
if _, err := url.ParseRequestURI(config.Spec.OSImageURL); err != nil {
return fmt.Errorf("parsing osImageOverride: %v", err)
}
}

if len(config.Spec.Users) == 0 {
return fmt.Errorf("TinkerbellMachineConfig: missing spec.Users: %s", config.Name)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/api/v1alpha1/tinkerbellmachineconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type TinkerbellMachineConfigSpec struct {
HardwareSelector HardwareSelector `json:"hardwareSelector"`
TemplateRef Ref `json:"templateRef,omitempty"`
OSFamily OSFamily `json:"osFamily"`
OSImageURL string `json:"osImageURL"`
Users []UserConfiguration `json:"users,omitempty"`
HostOSConfiguration *HostOSConfiguration `json:"hostOSConfiguration,omitempty"`
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/api/v1alpha1/tinkerbellmachineconfig_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ func TestTinkerbellMachineConfigValidateFail(t *testing.T) {
),
expectedErr: "HostOSConfiguration is invalid for TinkerbellMachineConfig tinkerbellmachineconfig: NTPConfiguration.Servers can not be empty",
},
{
name: "Invalid OS Image URL",
machineConfig: CreateTinkerbellMachineConfig(func(mc *TinkerbellMachineConfig) {
mc.Spec.OSImageURL = "test"
}),
expectedErr: "parsing osImageOverride: parse \"test\": invalid URI for request",
},
}

for _, tc := range tests {
Expand Down
106 changes: 85 additions & 21 deletions pkg/providers/tinkerbell/assert.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ func AssertOsFamilyValid(spec *ClusterSpec) error {
return validateOsFamily(spec)
}

// AssertOSImageURL ensures that the OSImageURL value is either set at the datacenter config level or set for each machine config and not at both levels.
func AssertOSImageURL(spec *ClusterSpec) error {
return validateOSImageURL(spec)
}

// AssertcontrolPlaneIPNotInUse ensures the endpoint host for the control plane isn't in use.
// The check may be unreliable due to its implementation.
func NewIPNotInUseAssertion(client networkutils.NetClient) ClusterSpecAssertion {
Expand Down Expand Up @@ -264,6 +269,12 @@ type ValidatableCluster interface {

// ControlPlaneReplicaCount retrieves the control plane replica count of the ValidatableCluster.
ControlPlaneReplicaCount() int

// ClusterK8sVersion retreives the Cluster level Kubernetes version
ClusterK8sVersion() v1alpha1.KubernetesVersion

// WorkerGroupK8sVersion maps each worker group with its Kubernetes version.
WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion
}

// ValidatableTinkerbellClusterSpec wraps around the Tinkerbell ClusterSpec as a ValidatableCluster.
Expand All @@ -289,6 +300,16 @@ func (v *ValidatableTinkerbellClusterSpec) WorkerNodeHardwareGroups() []WorkerNo
return workerNodeGroupConfigs
}

// ClusterK8sVersion retrieves the Kubernetes version set at the cluster level.
func (v *ValidatableTinkerbellClusterSpec) ClusterK8sVersion() v1alpha1.KubernetesVersion {
return v.Cluster.Spec.KubernetesVersion
}

// WorkerNodeGroupK8sVersion returns each worker node group with its associated Kubernetes version.
func (v *ValidatableTinkerbellClusterSpec) WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion {
return WorkerNodeGroupWithK8sVersion(v.ClusterSpec.Spec)
}

// ValidatableTinkerbellCAPI wraps around the Tinkerbell control plane and worker CAPI obects as a ValidatableCluster.
type ValidatableTinkerbellCAPI struct {
KubeadmControlPlane *controlplanev1.KubeadmControlPlane
Expand All @@ -313,6 +334,26 @@ func (v *ValidatableTinkerbellCAPI) WorkerNodeHardwareGroups() []WorkerNodeHardw
return workerNodeHardwareList
}

// ClusterK8sVersion returns the Kubernetes version in major.minor format for a ValidatableTinkerbellCAPI.
func (v *ValidatableTinkerbellCAPI) ClusterK8sVersion() v1alpha1.KubernetesVersion {
return v.toK8sVersion(v.KubeadmControlPlane.Spec.Version)
}

// WorkerNodeGroupK8sVersion returns each worker node group mapped to Kubernetes version in major.minor format for a ValidatableTinkerbellCAPI.
func (v *ValidatableTinkerbellCAPI) WorkerNodeGroupK8sVersion() map[string]v1alpha1.KubernetesVersion {
wngK8sversion := make(map[string]v1alpha1.KubernetesVersion)
for _, wng := range v.WorkerGroups {
k8sVersion := v.toK8sVersion(*wng.MachineDeployment.Spec.Template.Spec.Version)
wngK8sversion[wng.MachineDeployment.Name] = k8sVersion
}
return wngK8sversion
}

func (v *ValidatableTinkerbellCAPI) toK8sVersion(k8sversion string) v1alpha1.KubernetesVersion {
kubeVersion := v1alpha1.KubernetesVersion(k8sversion[1:5])
return kubeVersion
}

// AssertionsForScaleUpDown asserts that catalogue has sufficient hardware to
// support the scaling up/down from current ClusterSpec to desired ValidatableCluster.
// nolint:gocyclo // TODO: Reduce cyclomatic complexity https://github.com/aws/eks-anywhere-internal/issues/1186
Expand Down Expand Up @@ -391,7 +432,7 @@ func AssertionsForScaleUpDown(catalogue *hardware.Catalogue, current Validatable

// ExtraHardwareAvailableAssertionForRollingUpgrade asserts that catalogue has sufficient hardware to
// support the ClusterSpec during an rolling upgrade workflow.
func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalogue) ClusterSpecAssertion {
func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalogue, current ValidatableCluster, eksaVersionUpgrade bool) ClusterSpecAssertion {
return func(spec *ClusterSpec) error {
// Without Hardware selectors we get undesirable behavior so ensure we have them for
// all MachineConfigs.
Expand All @@ -403,41 +444,64 @@ func ExtraHardwareAvailableAssertionForRollingUpgrade(catalogue *hardware.Catalo
// will account for the same selector being specified on different groups.
requirements := minimumHardwareRequirements{}

maxSurge := 1
if spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil {
maxSurge = spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.RollingUpdate.MaxSurge
if spec.Cluster.Spec.KubernetesVersion != current.ClusterK8sVersion() || eksaVersionUpgrade {
if err := ensureCPHardwareAvailability(spec, current, requirements); err != nil {
return err
}

Check warning on line 450 in pkg/providers/tinkerbell/assert.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/tinkerbell/assert.go#L449-L450

Added lines #L449 - L450 were not covered by tests
}
err := requirements.Add(
spec.ControlPlaneMachineConfig().Spec.HardwareSelector,
maxSurge,
)
if err != nil {

if err := ensureWorkerHardwareAvailability(spec, current, requirements, eksaVersionUpgrade); err != nil {
return err
}

Check warning on line 455 in pkg/providers/tinkerbell/assert.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/tinkerbell/assert.go#L454-L455

Added lines #L454 - L455 were not covered by tests

if spec.HasExternalEtcd() {
return fmt.Errorf("external etcd upgrade is not supported")
}

Check warning on line 459 in pkg/providers/tinkerbell/assert.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/tinkerbell/assert.go#L458-L459

Added lines #L458 - L459 were not covered by tests

if err := validateMinimumHardwareRequirements(requirements, catalogue); err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}
return nil
}
}

for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() {
maxSurge = 1
func ensureCPHardwareAvailability(spec *ClusterSpec, current ValidatableCluster, hwReq minimumHardwareRequirements) error {
maxSurge := 1

if spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil {
maxSurge = spec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy.RollingUpdate.MaxSurge
}
err := hwReq.Add(
spec.ControlPlaneMachineConfig().Spec.HardwareSelector,
maxSurge,
)
if err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}

Check warning on line 480 in pkg/providers/tinkerbell/assert.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/tinkerbell/assert.go#L479-L480

Added lines #L479 - L480 were not covered by tests
return nil
}

func ensureWorkerHardwareAvailability(spec *ClusterSpec, current ValidatableCluster, hwReq minimumHardwareRequirements, eksaVersionUpgrade bool) error {
currentWngK8sversion := current.WorkerNodeGroupK8sVersion()
desiredWngK8sVersion := WorkerNodeGroupWithK8sVersion(spec.Spec)
for _, nodeGroup := range spec.WorkerNodeGroupConfigurations() {
maxSurge := 1
// As rolling upgrades and scale up/down is not permitted in a single operation, its safe to access directly using the md name.
mdName := fmt.Sprintf("%s-%s", spec.Cluster.Name, nodeGroup.Name)
if currentWngK8sversion[mdName] != desiredWngK8sVersion[mdName] || eksaVersionUpgrade {
if nodeGroup.UpgradeRolloutStrategy != nil {
maxSurge = nodeGroup.UpgradeRolloutStrategy.RollingUpdate.MaxSurge
}
err := requirements.Add(
err := hwReq.Add(
spec.WorkerNodeGroupMachineConfig(nodeGroup).Spec.HardwareSelector,
maxSurge,
)
if err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}
}

if spec.HasExternalEtcd() {
return fmt.Errorf("external etcd upgrade is not supported")
}

if err := validateMinimumHardwareRequirements(requirements, catalogue); err != nil {
return fmt.Errorf("for rolling upgrade, %v", err)
}
return nil
}
return nil
}

// ensureHardwareSelectorsSpecified ensures each machine config present in spec has a hardware
Expand Down
Loading

0 comments on commit 5a490f7

Please sign in to comment.