From 51a401142cd58a308fdbdd22f91257e4df52bdc5 Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Tue, 3 Dec 2024 16:52:10 +0100 Subject: [PATCH] Recreate bootstrap token if it was cleaned up --- .../controllers/kubeadmconfig_controller.go | 40 +++-- .../kubeadmconfig_controller_test.go | 149 ++++++++++++++++++ 2 files changed, 177 insertions(+), 12 deletions(-) diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go index f44295c25160..6e1ba4d5f295 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go @@ -322,7 +322,7 @@ func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, c // If the BootstrapToken has been generated for a join but the config owner has no nodeRefs, // this indicates that the node has not yet joined and the token in the join config has not // been consumed and it may need a refresh. - return r.refreshBootstrapTokenIfNeeded(ctx, config, cluster) + return r.refreshBootstrapTokenIfNeeded(ctx, config, cluster, scope) } if configOwner.IsMachinePool() { // If the BootstrapToken has been generated and infrastructure is ready but the configOwner is a MachinePool, @@ -360,7 +360,7 @@ func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, c return r.joinWorker(ctx, scope) } -func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster) (ctrl.Result, error) { +func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster, scope *Scope) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token @@ -371,6 +371,11 @@ func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Cont secret, err := getToken(ctx, remoteClient, token) if err != nil { + if apierrors.IsNotFound(err) && scope.ConfigOwner.IsMachinePool() { + log.Info("Bootstrap token secret not found, triggering creation of new token") + config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = "" + return r.recreateBootstrapToken(ctx, config, scope, remoteClient) + } return ctrl.Result{}, errors.Wrapf(err, "failed to get bootstrap token secret in order to refresh it") } log = log.WithValues("Secret", klog.KObj(secret)) @@ -401,6 +406,11 @@ func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Cont log.Info("Refreshing token until the infrastructure has a chance to consume it", "oldExpiration", secretExpiration, "newExpiration", newExpiration) err = remoteClient.Update(ctx, secret) if err != nil { + if apierrors.IsNotFound(err) && scope.ConfigOwner.IsMachinePool() { + log.Info("Bootstrap token secret not found, triggering creation of new token") + config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = "" + return r.recreateBootstrapToken(ctx, config, scope, remoteClient) + } return ctrl.Result{}, errors.Wrapf(err, "failed to refresh bootstrap token") } return ctrl.Result{ @@ -408,6 +418,21 @@ func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Cont }, nil } +func (r *KubeadmConfigReconciler) recreateBootstrapToken(ctx context.Context, config *bootstrapv1.KubeadmConfig, scope *Scope, remoteClient client.Client) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + token, err := createToken(ctx, remoteClient, r.TokenTTL) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create new bootstrap token") + } + + config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = token + log.V(3).Info("Altering JoinConfiguration.Discovery.BootstrapToken.Token") + + // Update the bootstrap data + return r.joinWorker(ctx, scope) +} + func (r *KubeadmConfigReconciler) rotateMachinePoolBootstrapToken(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster, scope *Scope) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) log.V(2).Info("Config is owned by a MachinePool, checking if token should be rotated") @@ -423,16 +448,7 @@ func (r *KubeadmConfigReconciler) rotateMachinePoolBootstrapToken(ctx context.Co } if shouldRotate { log.Info("Creating new bootstrap token, the existing one should be rotated") - token, err := createToken(ctx, remoteClient, r.TokenTTL) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to create new bootstrap token") - } - - config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = token - log.V(3).Info("Altering JoinConfiguration.Discovery.BootstrapToken.Token") - - // update the bootstrap data - return r.joinWorker(ctx, scope) + return r.recreateBootstrapToken(ctx, config, scope, remoteClient) } return ctrl.Result{ RequeueAfter: r.tokenCheckRefreshOrRotationInterval(), diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go index f91682487ef0..f99ebbe52845 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go @@ -1441,6 +1441,155 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { g.Expect(foundNew).To(BeTrue()) } +func TestBootstrapTokenRefreshIfTokenSecretCleaned(t *testing.T) { + t.Run("should not recreate the token for Machines", func(t *testing.T) { + g := NewWithT(t) + + cluster := builder.Cluster(metav1.NamespaceDefault, "cluster").Build() + cluster.Status.InfrastructureReady = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) + cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} + + controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") + initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine.Namespace, "control-plane-init-config") + + addKubeadmConfigToMachine(initConfig, controlPlaneInitMachine) + + workerMachine := newWorkerMachineForCluster(cluster) + workerJoinConfig := newWorkerJoinKubeadmConfig(metav1.NamespaceDefault, "worker-join-cfg") + addKubeadmConfigToMachine(workerJoinConfig, workerMachine) + objects := []client.Object{ + cluster, + workerMachine, + workerJoinConfig, + } + + objects = append(objects, createSecrets(t, cluster, initConfig)...) + myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() + remoteClient := fake.NewClientBuilder().Build() + k := &KubeadmConfigReconciler{ + Client: myclient, + SecretCachingClient: myclient, + KubeadmInitLock: &myInitLocker{}, + TokenTTL: DefaultTokenTTL, + ClusterCache: clustercache.NewFakeClusterCache(remoteClient, client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + } + request := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Namespace: metav1.NamespaceDefault, + Name: "worker-join-cfg", + }, + } + result, err := k.Reconcile(ctx, request) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) + + cfg, err := getKubeadmConfig(myclient, "worker-join-cfg", metav1.NamespaceDefault) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(cfg.Status.Ready).To(BeTrue()) + g.Expect(cfg.Status.DataSecretName).NotTo(BeNil()) + g.Expect(cfg.Status.ObservedGeneration).NotTo(BeNil()) + g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(BeEmpty()) + firstToken := cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token + + l := &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(1)) + + t.Log("Token should not get recreated for single Machine since it will not use the new token if spec.bootstrap.dataSecretName was already set") + + // Simulate token cleaner of Kubernetes having deleted the token secret + err = remoteClient.Delete(ctx, &l.Items[0]) + g.Expect(err).ToNot(HaveOccurred()) + + result, err = k.Reconcile(ctx, request) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("failed to get bootstrap token secret in order to refresh it")) + // New token should not have been created + cfg, err = getKubeadmConfig(myclient, "worker-join-cfg", metav1.NamespaceDefault) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).To(Equal(firstToken)) + + l = &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(BeEmpty()) + }) + t.Run("should recreate the token for MachinePools", func(t *testing.T) { + _ = feature.MutableGates.Set("MachinePool=true") + g := NewWithT(t) + + cluster := builder.Cluster(metav1.NamespaceDefault, "cluster").Build() + cluster.Status.InfrastructureReady = true + conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) + cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443} + + controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") + initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine.Namespace, "control-plane-init-config") + + addKubeadmConfigToMachine(initConfig, controlPlaneInitMachine) + + workerMachinePool := newWorkerMachinePoolForCluster(cluster) + workerJoinConfig := newWorkerJoinKubeadmConfig(workerMachinePool.Namespace, "workerpool-join-cfg") + addKubeadmConfigToMachinePool(workerJoinConfig, workerMachinePool) + objects := []client.Object{ + cluster, + workerMachinePool, + workerJoinConfig, + } + + objects = append(objects, createSecrets(t, cluster, initConfig)...) + myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}, &expv1.MachinePool{}).Build() + remoteClient := fake.NewClientBuilder().Build() + k := &KubeadmConfigReconciler{ + Client: myclient, + SecretCachingClient: myclient, + KubeadmInitLock: &myInitLocker{}, + TokenTTL: DefaultTokenTTL, + ClusterCache: clustercache.NewFakeClusterCache(remoteClient, client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + } + request := ctrl.Request{ + NamespacedName: client.ObjectKey{ + Namespace: metav1.NamespaceDefault, + Name: "workerpool-join-cfg", + }, + } + result, err := k.Reconcile(ctx, request) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) + + cfg, err := getKubeadmConfig(myclient, "workerpool-join-cfg", metav1.NamespaceDefault) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(cfg.Status.Ready).To(BeTrue()) + g.Expect(cfg.Status.DataSecretName).NotTo(BeNil()) + g.Expect(cfg.Status.ObservedGeneration).NotTo(BeNil()) + g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(BeEmpty()) + firstToken := cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token + + l := &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(1)) + + t.Log("Ensure that the token gets recreated if it was cleaned up by Kubernetes (e.g. on expiry)") + + // Simulate token cleaner of Kubernetes having deleted the token secret + err = remoteClient.Delete(ctx, &l.Items[0]) + g.Expect(err).ToNot(HaveOccurred()) + + result, err = k.Reconcile(ctx, request) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) + // New token should have been created + cfg, err = getKubeadmConfig(myclient, "workerpool-join-cfg", metav1.NamespaceDefault) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(BeEmpty()) + g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(Equal(firstToken)) + + l = &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(1)) + }) +} + // Ensure the discovery portion of the JoinConfiguration gets generated correctly. func TestKubeadmConfigReconciler_Reconcile_DiscoveryReconcileBehaviors(t *testing.T) { caHash := []string{"...."}