diff --git a/config/crd/bases/anywhere.eks.amazonaws.com_machinedeploymentupgrades.yaml b/config/crd/bases/anywhere.eks.amazonaws.com_machinedeploymentupgrades.yaml index 264aba83cee25..a7120cdba4583 100644 --- a/config/crd/bases/anywhere.eks.amazonaws.com_machinedeploymentupgrades.yaml +++ b/config/crd/bases/anywhere.eks.amazonaws.com_machinedeploymentupgrades.yaml @@ -45,19 +45,19 @@ spec: name: type: string type: object - controlPlane: - properties: - kind: - type: string - name: - type: string - type: object kubeadmClusterConfig: type: string kubeletVersion: type: string kubernetesVersion: type: string + machineDeployment: + properties: + kind: + type: string + name: + type: string + type: object machinesRequireUpgrade: items: properties: @@ -69,10 +69,10 @@ spec: type: array required: - cluster - - controlPlane - kubeadmClusterConfig - kubeletVersion - kubernetesVersion + - machineDeployment - machinesRequireUpgrade type: object status: diff --git a/controllers/machinedeploymentupgrade_controller.go b/controllers/machinedeploymentupgrade_controller.go index a2b0bed303e8b..aadcf2ecd7527 100644 --- a/controllers/machinedeploymentupgrade_controller.go +++ b/controllers/machinedeploymentupgrade_controller.go @@ -18,23 +18,36 @@ package controllers import ( "context" + "errors" + "fmt" + "time" + "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" ) +// mdUpgradeFinalizerName is the finalizer added to MachineDeploymentUpgrade objects to handle deletion. +const mdUpgradeFinalizerName = "machinedeploymentupgrades.anywhere.eks.amazonaws.com/finalizer" + // MachineDeploymentUpgradeReconciler reconciles a MachineDeploymentUpgrade object. type MachineDeploymentUpgradeReconciler struct { client client.Client + log logr.Logger } // NewMachineDeploymentUpgradeReconciler returns a new instance of MachineDeploymentUpgradeReconciler. func NewMachineDeploymentUpgradeReconciler(client client.Client) *MachineDeploymentUpgradeReconciler { return &MachineDeploymentUpgradeReconciler{ client: client, + log: ctrl.Log.WithName("MachineDeploymentUpgradeController"), } } @@ -43,12 +56,63 @@ func NewMachineDeploymentUpgradeReconciler(client client.Client) *MachineDeploym //+kubebuilder:rbac:groups=anywhere.eks.amazonaws.com,resources=machinedeploymentupgrades/finalizers,verbs=update // Reconcile reconciles a MachineDeploymentUpgrade object. -func (r *MachineDeploymentUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) +// nolint:gocyclo +func (r *MachineDeploymentUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, reterr error) { + log := r.log.WithValues("MachineDeploymentUpgrade", req.NamespacedName) - // TODO(user): your logic here + log.Info("Reconciling machine deployment upgrade object") + mdUpgrade := &anywherev1.MachineDeploymentUpgrade{} + if err := r.client.Get(ctx, req.NamespacedName, mdUpgrade); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, err + } + return ctrl.Result{}, err + } - return ctrl.Result{}, nil + patchHelper, err := patch.NewHelper(mdUpgrade, r.client) + if err != nil { + return ctrl.Result{}, err + } + + defer func() { + err := r.updateStatus(ctx, log, mdUpgrade) + if err != nil { + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + + // Always attempt to patch the object and status after each reconciliation. + patchOpts := []patch.Option{} + + // We want the observedGeneration to indicate, that the status shown is up-to-date given the desired spec of the same generation. + // However, if there is an error while updating the status, we may get a partial status update, In this case, + // a partially updated status is not considered up to date, so we should not update the observedGeneration + + // Patch ObservedGeneration only if the reconciliation completed without error + if reterr == nil { + patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) + } + if err := patchMachineDeploymentUpgrade(ctx, patchHelper, mdUpgrade, patchOpts...); err != nil { + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + + // Only requeue if we are not already re-queueing and the Ready condition is false. + // We do this to be able to update the status continuously until it becomes ready, + // since there might be changes in state of the world that don't trigger reconciliation requests + + if reterr == nil && !result.Requeue && result.RequeueAfter <= 0 && !mdUpgrade.Status.Ready { + result = ctrl.Result{RequeueAfter: 10 * time.Second} + } + }() + + // Reconcile the MachineDeploymentUpgrade deletion if the DeletionTimestamp is set. + if !mdUpgrade.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, log, mdUpgrade) + } + + // AddFinalizer is idempotent + controllerutil.AddFinalizer(mdUpgrade, mdUpgradeFinalizerName) + + return r.reconcile(ctx, log, mdUpgrade) } // SetupWithManager sets up the controller with the Manager. @@ -57,3 +121,82 @@ func (r *MachineDeploymentUpgradeReconciler) SetupWithManager(mgr ctrl.Manager) For(&anywherev1.MachineDeploymentUpgrade{}). Complete(r) } + +func (r *MachineDeploymentUpgradeReconciler) reconcile(ctx context.Context, log logr.Logger, mdUpgrade *anywherev1.MachineDeploymentUpgrade) (ctrl.Result, error) { + log.Info("Upgrading all worker nodes") + for _, machineRef := range mdUpgrade.Spec.MachinesRequireUpgrade { + nodeUpgrade := nodeUpgrader(machineRef, "v1.28.3-eks-1-28-9", "v3.5.9-eks-1-28-9", false) + if err := r.client.Create(ctx, nodeUpgrade); client.IgnoreAlreadyExists(err) != nil { + return ctrl.Result{}, fmt.Errorf("failed to create node upgrader for machine %s: %v", machineRef.Name, err) + } + } + + err := r.updateStatus(ctx, log, mdUpgrade) + if err != nil { + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *MachineDeploymentUpgradeReconciler) reconcileDelete(ctx context.Context, log logr.Logger, mdUpgrade *anywherev1.MachineDeploymentUpgrade) (ctrl.Result, error) { + log.Info("Reconcile MachineDeploymentUpgrade deletion") + + m := &anywherev1.MachineDeploymentUpgrade{} + if err := r.client.Get(ctx, GetNamespacedNameType(mdUpgrade.ObjectMeta.Name, mdUpgrade.ObjectMeta.Namespace), m); err != nil { + if apierrors.IsNotFound(err) { + log.Info("MachineDeploymentUpgrade not found, skipping deletion") + } else { + return ctrl.Result{}, fmt.Errorf("getting MachineDeploymentUpgrade %s: %v", mdUpgrade.ObjectMeta.Name, err) + } + } else { + log.Info("Deleting MachineDeploymentUpgrade", "Name", mdUpgrade.ObjectMeta.Name) + if err := r.client.Delete(ctx, m); err != nil { + return ctrl.Result{}, fmt.Errorf("deleting MachineDeploymentUpgrade: %v", err) + } + } + + // Remove the finalizer on MachineDeploymentUpgrade object + controllerutil.RemoveFinalizer(mdUpgrade, mdUpgradeFinalizerName) + return ctrl.Result{}, nil +} + +func patchMachineDeploymentUpgrade(ctx context.Context, patchHelper *patch.Helper, mdUpgrade *anywherev1.MachineDeploymentUpgrade, patchOpts ...patch.Option) error { + // Always attempt to patch the object and status after each reconciliation. + return patchHelper.Patch(ctx, mdUpgrade, patchOpts...) +} + +func (r *MachineDeploymentUpgradeReconciler) updateStatus(ctx context.Context, log logr.Logger, mdUpgrade *anywherev1.MachineDeploymentUpgrade) error { + // When MachineDeploymentUpgrade is fully deleted, we do not need to update the status. Without this check + // the subsequent patch operations would fail if the status is updated after it is fully deleted. + if !mdUpgrade.DeletionTimestamp.IsZero() && len(mdUpgrade.GetFinalizers()) == 0 { + log.Info("MachineDeploymentUpgrade is deleted, skipping status update") + return nil + } + + log.Info("Updating MachineDeploymentUpgrade status") + + var ready int64 + total := len(mdUpgrade.Spec.MachinesRequireUpgrade) + mdUpgrade.Status.RequireUpgrade = int64(total) + + for _, machine := range mdUpgrade.Spec.MachinesRequireUpgrade { + nodeUpgrade, err := getNodeUpgrade(ctx, r.client, nodeUpgraderName(machine.Name)) + if err != nil { + return err + } + if nodeUpgrade.Status.Completed { + ready++ + } + } + mdUpgrade.Status.Upgraded = ready + if mdUpgrade.Status.Upgraded != mdUpgrade.Status.RequireUpgrade { + log.Info("Nodes are not ready yet", "total", total, "ready", mdUpgrade.Status.Upgraded) + return errors.New("nodes are not ready yet") + } + + log.Info("Nodes ready", "total", mdUpgrade.Status.Upgraded) + mdUpgrade.Status.Ready = true + + return nil +} diff --git a/controllers/machinedeploymentupgrade_controller_test.go b/controllers/machinedeploymentupgrade_controller_test.go new file mode 100644 index 0000000000000..c4e56ab46a44d --- /dev/null +++ b/controllers/machinedeploymentupgrade_controller_test.go @@ -0,0 +1,115 @@ +package controllers_test + +import ( + "context" + "fmt" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/aws/eks-anywhere/controllers" + anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" +) + +func TestMDUpgradeReconcile(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + cluster, machine, node, mdUpgrade, nodeUpgrade := getObjectsForMDUpgradeTest() + nodeUpgrade.Name = fmt.Sprintf("%s-node-upgrade", machine.Name) + nodeUpgrade.Status = anywherev1.NodeUpgradeStatus{ + Completed: true, + } + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, mdUpgrade, nodeUpgrade).Build() + + r := controllers.NewMachineDeploymentUpgradeReconciler(client) + req := mdUpgradeRequest(mdUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + mdu := &anywherev1.MachineDeploymentUpgrade{} + err = client.Get(ctx, types.NamespacedName{Name: mdUpgrade.Name, Namespace: "eksa-system"}, mdu) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestMDUpgradeReconcileDelete(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + cluster, machine, node, mdUpgrade, nodeUpgrade := getObjectsForMDUpgradeTest() + nodeUpgrade.Name = fmt.Sprintf("%s-node-upgrade", machine.Name) + nodeUpgrade.Status = anywherev1.NodeUpgradeStatus{ + Completed: true, + } + client := fake.NewClientBuilder().WithRuntimeObjects(cluster, machine, node, mdUpgrade, nodeUpgrade).Build() + + r := controllers.NewMachineDeploymentUpgradeReconciler(client) + req := mdUpgradeRequest(mdUpgrade) + _, err := r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + mdu := &anywherev1.MachineDeploymentUpgrade{} + err = client.Get(ctx, types.NamespacedName{Name: mdUpgrade.Name, Namespace: "eksa-system"}, mdu) + g.Expect(err).ToNot(HaveOccurred()) + + err = client.Delete(ctx, mdUpgrade) + g.Expect(err).ToNot(HaveOccurred()) + + _, err = r.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + + mdu = &anywherev1.MachineDeploymentUpgrade{} + err = client.Get(ctx, types.NamespacedName{Name: mdUpgrade.Name, Namespace: "eksa-system"}, mdu) + g.Expect(err).To(MatchError("machinedeploymentupgrades.anywhere.eks.amazonaws.com \"md-upgrade-request\" not found")) +} + +func getObjectsForMDUpgradeTest() (*clusterv1.Cluster, *clusterv1.Machine, *corev1.Node, *anywherev1.MachineDeploymentUpgrade, *anywherev1.NodeUpgrade) { + cluster := generateCluster() + node := generateNode() + machine := generateMachine(cluster, node) + nodeUpgrade := generateNodeUpgrade(machine) + mdUpgrade := generateMDUpgrade(machine, cluster) + return cluster, machine, node, mdUpgrade, nodeUpgrade +} + +func mdUpgradeRequest(mdUpgrade *anywherev1.MachineDeploymentUpgrade) reconcile.Request { + return reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: mdUpgrade.Name, + Namespace: mdUpgrade.Namespace, + }, + } +} + +func generateMDUpgrade(machine *clusterv1.Machine, cluster *clusterv1.Cluster) *anywherev1.MachineDeploymentUpgrade { + return &anywherev1.MachineDeploymentUpgrade{ + ObjectMeta: metav1.ObjectMeta{ + Name: "md-upgrade-request", + Namespace: "eksa-system", + }, + Spec: anywherev1.MachineDeploymentUpgradeSpec{ + Cluster: anywherev1.Ref{ + Name: cluster.Name, + Kind: "Cluster", + }, + MachineDeployment: anywherev1.Ref{ + Name: "my-md", + Kind: "MachineDeployment", + }, + MachinesRequireUpgrade: []anywherev1.Ref{ + { + Name: machine.Name, + Kind: "Machine", + }, + }, + KubernetesVersion: "v1.28.1", + KubeletVersion: "v1.28.1", + KubeadmClusterConfig: "", + }, + } +} diff --git a/controllers/nodeupgrade_controller.go b/controllers/nodeupgrade_controller.go index 8019c504c5719..802a343f5af69 100644 --- a/controllers/nodeupgrade_controller.go +++ b/controllers/nodeupgrade_controller.go @@ -86,11 +86,11 @@ func (r *NodeUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) } machineToBeUpgraded := &clusterv1.Machine{} - if err := r.client.Get(ctx, getNamespacedNameType(nodeUpgrade.Spec.Machine.Name, nodeUpgrade.Spec.Machine.Namespace), machineToBeUpgraded); err != nil { + if err := r.client.Get(ctx, GetNamespacedNameType(nodeUpgrade.Spec.Machine.Name, nodeUpgrade.Spec.Machine.Namespace), machineToBeUpgraded); err != nil { return ctrl.Result{}, err } - rClient, err := r.remoteClientRegistry.GetClient(ctx, getNamespacedNameType(machineToBeUpgraded.Spec.ClusterName, machineToBeUpgraded.Namespace)) + rClient, err := r.remoteClientRegistry.GetClient(ctx, GetNamespacedNameType(machineToBeUpgraded.Spec.ClusterName, machineToBeUpgraded.Namespace)) if err != nil { return ctrl.Result{}, err } @@ -352,7 +352,8 @@ func isControlPlane(node *corev1.Node) bool { return ok } -func getNamespacedNameType(name, namespace string) types.NamespacedName { +// GetNamespacedNameType takes name and namespace and returns NamespacedName in namespace/name format. +func GetNamespacedNameType(name, namespace string) types.NamespacedName { return types.NamespacedName{ Name: name, Namespace: namespace, @@ -384,8 +385,40 @@ func upgraderPodExists(ctx context.Context, remoteClient client.Client, nodeName func getUpgraderPod(ctx context.Context, remoteClient client.Client, nodeName string) (*corev1.Pod, error) { pod := &corev1.Pod{} - if err := remoteClient.Get(ctx, getNamespacedNameType(upgrader.PodName(nodeName), constants.EksaSystemNamespace), pod); err != nil { + if err := remoteClient.Get(ctx, GetNamespacedNameType(upgrader.PodName(nodeName), constants.EksaSystemNamespace), pod); err != nil { return nil, err } return pod, nil } + +func getNodeUpgrade(ctx context.Context, remoteClient client.Client, nodeUpgradeName string) (*anywherev1.NodeUpgrade, error) { + n := &anywherev1.NodeUpgrade{} + if err := remoteClient.Get(ctx, GetNamespacedNameType(nodeUpgradeName, constants.EksaSystemNamespace), n); err != nil { + return nil, err + } + return n, nil +} + +// nodeUpgradeName returns the name of the node upgrade object based on the machine reference. +func nodeUpgraderName(machineRefName string) string { + return fmt.Sprintf("%s-node-upgrade", machineRefName) +} + +func nodeUpgrader(machineRef anywherev1.Ref, kubernetesVersion, etcdVersion string, firstControlPlane bool) *anywherev1.NodeUpgrade { + return &anywherev1.NodeUpgrade{ + ObjectMeta: v1.ObjectMeta{ + Name: nodeUpgraderName(machineRef.Name), + Namespace: constants.EksaSystemNamespace, + }, + Spec: anywherev1.NodeUpgradeSpec{ + Machine: corev1.ObjectReference{ + Kind: machineRef.Kind, + Namespace: constants.EksaSystemNamespace, + Name: machineRef.Name, + }, + KubernetesVersion: kubernetesVersion, + EtcdVersion: &etcdVersion, + FirstNodeToBeUpgraded: firstControlPlane, + }, + } +} diff --git a/pkg/api/v1alpha1/machinedeploymentupgrade_types.go b/pkg/api/v1alpha1/machinedeploymentupgrade_types.go index 1ef08a885c840..28876d1f1b0cb 100644 --- a/pkg/api/v1alpha1/machinedeploymentupgrade_types.go +++ b/pkg/api/v1alpha1/machinedeploymentupgrade_types.go @@ -10,7 +10,7 @@ const MachineDeploymentUpgradeKind = "MachineDeploymentUpgrade" // MachineDeploymentUpgradeSpec defines the desired state of MachineDeploymentUpgrade. type MachineDeploymentUpgradeSpec struct { Cluster Ref `json:"cluster"` - MachineDeployment Ref `json:"controlPlane"` + MachineDeployment Ref `json:"machineDeployment"` MachinesRequireUpgrade []Ref `json:"machinesRequireUpgrade"` KubernetesVersion string `json:"kubernetesVersion"` KubeletVersion string `json:"kubeletVersion"` @@ -19,9 +19,9 @@ type MachineDeploymentUpgradeSpec struct { // MachineDeploymentUpgradeStatus defines the observed state of MachineDeploymentUpgrade. type MachineDeploymentUpgradeStatus struct { - RequireUpgrade int64 `json:"requireUpgrade"` - Upgraded int64 `json:"upgraded"` - Ready bool `json:"ready"` + RequireUpgrade int64 `json:"requireUpgrade,omitempty"` + Upgraded int64 `json:"upgraded,omitempty"` + Ready bool `json:"ready,omitempty"` } //+kubebuilder:object:root=true diff --git a/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml b/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml index 67b631d0a2c36..1ff3d45a7cc0d 100644 --- a/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml +++ b/pkg/nodeupgrader/testdata/expected_first_control_plane_upgrader_pod.yaml @@ -1,7 +1,7 @@ metadata: creationTimestamp: null labels: - ekd-d-upgrader: "true" + eks-d-upgrader: "true" name: my-node-node-upgrader namespace: eksa-system spec: diff --git a/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml b/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml index b0df3a8cbfd5b..692e1e67a65ac 100755 --- a/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml +++ b/pkg/nodeupgrader/testdata/expected_rest_control_plane_upgrader_pod.yaml @@ -1,7 +1,7 @@ metadata: creationTimestamp: null labels: - ekd-d-upgrader: "true" + eks-d-upgrader: "true" name: my-node-node-upgrader namespace: eksa-system spec: diff --git a/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml b/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml index f5f56086f5b51..da16187e75a58 100755 --- a/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml +++ b/pkg/nodeupgrader/testdata/expected_worker_upgrader_pod.yaml @@ -1,7 +1,7 @@ metadata: creationTimestamp: null labels: - ekd-d-upgrader: "true" + eks-d-upgrader: "true" name: my-node-node-upgrader namespace: eksa-system spec: diff --git a/pkg/nodeupgrader/upgrader.go b/pkg/nodeupgrader/upgrader.go index f5507fbfd66fe..843709714ae1c 100644 --- a/pkg/nodeupgrader/upgrader.go +++ b/pkg/nodeupgrader/upgrader.go @@ -65,7 +65,7 @@ func upgraderPod(nodeName, image string) *corev1.Pod { Name: PodName(nodeName), Namespace: constants.EksaSystemNamespace, Labels: map[string]string{ - "ekd-d-upgrader": "true", + "eks-d-upgrader": "true", }, }, Spec: corev1.PodSpec{