Skip to content

Commit

Permalink
Merge pull request rancher#47894 from jiaqiluo/fix-suc
Browse files Browse the repository at this point in the history
  • Loading branch information
jiaqiluo authored Nov 6, 2024
2 parents f95d1dc + b1f6f14 commit 66ce2f1
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 168 deletions.
132 changes: 0 additions & 132 deletions pkg/agent/clean/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"strings"
"time"

"github.com/rancher/rancher/pkg/controllers/dashboard/scaleavailable"
"github.com/rancher/rancher/pkg/controllers/management/usercontrollers"
"github.com/rancher/rancher/pkg/controllers/managementagent/nslabels"
"github.com/rancher/rancher/pkg/controllers/managementuserlegacy/helm"
Expand All @@ -33,8 +32,6 @@ import (
"github.com/sirupsen/logrus"
apierror "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)
Expand Down Expand Up @@ -75,11 +72,6 @@ var (
}
)

const (
// The finalizer is added by Wrangler to deployments, please check the `systemcharts` package for more details
legacyK3sBasedUpgraderDeprecationFinalizer = "wrangler.cattle.io/legacy-k3sBasedUpgrader-deprecation"
)

type getNSFunc func(*kubernetes.Clientset) ([]string, error)

func Cluster() error {
Expand Down Expand Up @@ -113,19 +105,6 @@ func Cluster() error {
}

var errors []error

// First, scale the cattle-cluster-agent down to 0 to stop it from running controllers,
// particularly the `systemcharts` handler that adds finalizers to deployments
if err := scaleDownClusterAgent(client); err != nil {
errors = append(errors, err)
}

// Deployments should be cleaned up before removing namespaces, specifically by removing the finalizer.
deploymentErr := cleanupDeployments(client)
if len(deploymentErr) > 0 {
errors = append(errors, deploymentErr...)
}

var toRemove = make([]string, len(nsToRemove))
copy(toRemove, nsToRemove)

Expand Down Expand Up @@ -293,117 +272,6 @@ func cleanupNamespaces(client *kubernetes.Clientset) []error {

}

// scaleDownClusterAgent ensures to scale down the cattle-cluster-agent deployment to zero
func scaleDownClusterAgent(client *kubernetes.Clientset) error {
logrus.Info("Attempting to scale down the cattle-cluster-agent deployment")
err := tryUpdate(func() error {
agent, err := client.AppsV1().Deployments(namespace.System).Get(context.TODO(), "cattle-cluster-agent", metav1.GetOptions{})
if err != nil {
if apierror.IsNotFound(err) {
return nil
}
return err
}

var updated bool
if agent.Spec.Replicas != nil && *agent.Spec.Replicas != 0 {
var zero int32 = 0
agent.Spec.Replicas = &zero
updated = true
}
// for the usage of the annotation, please check the package `scaleavailable`
val := agent.Annotations[scaleavailable.AvailableAnnotation]
if val != "invalid" {
agent.Annotations[scaleavailable.AvailableAnnotation] = "invalid"
updated = true
}

if updated {
logrus.Info("Scaling down cattle-cluster-agent")
if !dryRun {
_, err = client.AppsV1().Deployments(namespace.System).Update(context.TODO(), agent, metav1.UpdateOptions{})
if err != nil {
return err
}
}
}
return nil
})
if err != nil {
return err
}

logrus.Info("Waiting for cattle-cluster-agent's pods to be removed")
var backoff = wait.Backoff{
Duration: 1 * time.Second,
Factor: 1.5,
Jitter: 0,
Steps: 5,
}
return wait.ExponentialBackoff(backoff, func() (bool, error) {
set := labels.Set(map[string]string{"app": "cattle-cluster-agent"})
podList, err := client.CoreV1().Pods(namespace.System).List(context.TODO(), metav1.ListOptions{LabelSelector: set.String()})
if err != nil {
if apierror.IsNotFound(err) {
return true, nil
}
return false, err
}
if podList != nil && len(podList.Items) > 0 {
return false, nil
}
return true, nil
})
}

func cleanupDeployments(client *kubernetes.Clientset) []error {
logrus.Info("Starting cleanup of deployments")
deployments, err := client.AppsV1().Deployments("").List(context.TODO(), metav1.ListOptions{})
if err != nil {
return []error{err}
}
var errs []error
for _, item := range deployments.Items {
err = tryUpdate(func() error {
deployment, err := client.AppsV1().Deployments(item.Namespace).Get(context.TODO(), item.Name, metav1.GetOptions{})
if err != nil {
if apierror.IsNotFound(err) {
return nil
}
return err
}
var updated bool
// Cleanup finalizers
if len(deployment.Finalizers) > 0 {
var finalizers []string
for _, finalizer := range deployment.Finalizers {
if finalizer != legacyK3sBasedUpgraderDeprecationFinalizer {
finalizers = append(finalizers, finalizer)
}
}
if len(deployment.Finalizers) != len(finalizers) {
updated = true
deployment.Finalizers = finalizers
}
}
if updated {
logrus.Infof("Updating deployment: %s", deployment.Name)
if !dryRun {
_, err = client.AppsV1().Deployments(deployment.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
if err != nil {
return err
}
}
}
return nil
})
if err != nil {
errs = append(errs, err)
}
}
return errs
}

func cleanupClusterRoleBindings(client *kubernetes.Clientset) []error {
logrus.Info("Starting cleanup of clusterRoleBindings")
crbs, err := client.RbacV1().ClusterRoleBindings().List(context.TODO(), listOptions)
Expand Down
12 changes: 6 additions & 6 deletions pkg/controllers/dashboard/scaleavailable/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

const (
AvailableAnnotation = "management.cattle.io/scale-available"
availableAnnotation = "management.cattle.io/scale-available"
)

type handler struct {
Expand All @@ -30,14 +30,14 @@ func Register(ctx context.Context, wrangler *wrangler.Context) {
}
deploymentCache := wrangler.Apps.Deployment().Cache()
wrangler.Apps.Deployment().OnChange(ctx, "scale-available", h.OnChange)
deploymentCache.AddIndexer(AvailableAnnotation, func(obj *appsv1.Deployment) ([]string, error) {
if val := obj.Annotations[AvailableAnnotation]; val != "" {
return []string{AvailableAnnotation}, nil
deploymentCache.AddIndexer(availableAnnotation, func(obj *appsv1.Deployment) ([]string, error) {
if val := obj.Annotations[availableAnnotation]; val != "" {
return []string{availableAnnotation}, nil
}
return nil, nil
})
relatedresource.Watch(ctx, "scale-available-trigger", func(namespace, name string, obj runtime.Object) (result []relatedresource.Key, _ error) {
deps, err := deploymentCache.GetByIndex(AvailableAnnotation, AvailableAnnotation)
deps, err := deploymentCache.GetByIndex(availableAnnotation, availableAnnotation)
if err != nil {
return nil, err
}
Expand All @@ -55,7 +55,7 @@ func (h *handler) OnChange(key string, deployment *appsv1.Deployment) (*appsv1.D
if deployment == nil {
return nil, nil
}
numStr := deployment.Annotations[AvailableAnnotation]
numStr := deployment.Annotations[availableAnnotation]
if numStr == "" {
return deployment, nil
}
Expand Down
71 changes: 56 additions & 15 deletions pkg/controllers/dashboard/systemcharts/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package systemcharts

import (
"context"
"fmt"
"slices"
"time"

catalog "github.com/rancher/rancher/pkg/apis/catalog.cattle.io/v1"
v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
Expand All @@ -22,12 +25,14 @@ import (
k8sappsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/util/retry"
)

const (
repoName = "rancher-charts"
sucDeploymentName = "system-upgrade-controller"
legacyAppLabel = "io.cattle.field/appId"
repoName = "rancher-charts"
sucDeploymentName = "system-upgrade-controller"
legacyAppLabel = "io.cattle.field/appId"
legacyAppFinalizer = "systemcharts.cattle.io/legacy-k3s-based-upgrader-deprecation"
)

var (
Expand All @@ -50,7 +55,8 @@ func Register(ctx context.Context, wContext *wrangler.Context, registryOverride
h := &handler{
manager: wContext.SystemChartsManager,
namespaces: wContext.Core.Namespace(),
deployment: wContext.Apps.Deployment().Cache(),
deployment: wContext.Apps.Deployment(),
deploymentCache: wContext.Apps.Deployment().Cache(),
clusterRepo: wContext.Catalog.ClusterRepo(),
chartsConfig: chart.RancherConfigGetter{ConfigCache: wContext.Core.ConfigMap().Cache()},
registryOverride: registryOverride,
Expand All @@ -64,14 +70,15 @@ func Register(ctx context.Context, wContext *wrangler.Context, registryOverride
// ensure the system charts are installed with the correct values when there are changes to the rancher config map
relatedresource.WatchClusterScoped(ctx, "bootstrap-configmap-charts", relatedConfigMaps, wContext.Catalog.ClusterRepo(), wContext.Core.ConfigMap())

wContext.Apps.Deployment().OnRemove(ctx, "legacy-k3sBasedUpgrader-deprecation", h.onDeployment)
wContext.Apps.Deployment().OnChange(ctx, "legacy-k3sBasedUpgrader-deprecation", h.onDeployment)
return nil
}

type handler struct {
manager chart.Manager
namespaces corecontrollers.NamespaceController
deployment deploymentControllers.DeploymentCache
deployment deploymentControllers.DeploymentController
deploymentCache deploymentControllers.DeploymentCache
clusterRepo catalogcontrollers.ClusterRepoController
chartsConfig chart.RancherConfigGetter
registryOverride string
Expand Down Expand Up @@ -204,22 +211,23 @@ func (h *handler) getChartsToInstall() []*chart.Definition {
},
Enabled: func() bool {
toEnable := false
suc, err := h.deployment.Get(namespace.System, sucDeploymentName)
suc, err := h.deploymentCache.Get(namespace.System, sucDeploymentName)

// the absence of the deployment or the absence of the legacy label on the existing deployment indicate
// that the old rancher-k3s/rke2-upgrader Project App has been removed
if err != nil {
logrus.Debugf("[systemcharts] failed to get the deployment %s/%s: %s", namespace.System, sucDeploymentName, err.Error())
if errors.IsNotFound(err) {
toEnable = true
} else {
logrus.Warnf("[systemcharts] failed to get the deployment %s/%s: %s", namespace.System, sucDeploymentName, err.Error())
}
}
if suc != nil {
if _, ok := suc.Labels[legacyAppLabel]; !ok {
toEnable = true
}
}
logrus.Debugf("[systemcharts] feature ManagedSystemUpgradeController = %t, toEnable = %t",
logrus.Infof("[systemcharts] feature ManagedSystemUpgradeController = %t, toEnable = %t",
features.ManagedSystemUpgradeController.Enabled(), toEnable)

return features.ManagedSystemUpgradeController.Enabled() && toEnable
Expand All @@ -232,14 +240,47 @@ func (h *handler) getChartsToInstall() []*chart.Definition {
// when a specific event occurs on the target deployment. It is currently used to manage
// the migration from the legacy k3s-based-upgrader app to the system-upgrade-controller app.
func (h *handler) onDeployment(_ string, d *k8sappsv1.Deployment) (*k8sappsv1.Deployment, error) {
var found bool
if d != nil && d.Namespace == namespace.System && d.Name == sucDeploymentName {
appName, ok := d.Labels[legacyAppLabel]
if ok {
if appName == k3sbasedupgrade.K3sAppName || appName == k3sbasedupgrade.Rke2AppName {
found = true
}
}
}
if found {
index := slices.Index(d.Finalizers, legacyAppFinalizer)
logrus.Infof("[systemcharts] found deployment %s/%s with label %s=%s, index of target finalzier = %d",
d.Namespace, d.Name, legacyAppLabel, d.Labels[legacyAppLabel], index)
d = d.DeepCopy()
// When the deployment is being deleted, remove the finalizer if it exists, and enqueue the rancher-charts clusterRepo
if d.DeletionTimestamp != nil {
appName, ok := d.Labels[legacyAppLabel]
if ok {
logrus.Debugf("[systemcharts] found deployment %s/%s with label %s=%s", d.Namespace, d.Name, legacyAppLabel, appName)
if appName == k3sbasedupgrade.K3sAppName || appName == k3sbasedupgrade.Rke2AppName {
logrus.Debugf("[systemcharts] enqueue %s", repoName)
h.clusterRepo.Enqueue(repoName)
if index >= 0 {
var finalizers []string
finalizers = append(finalizers, d.Finalizers[:index]...)
finalizers = append(finalizers, d.Finalizers[index+1:]...)
d.Finalizers = finalizers
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
_, err := h.deployment.Update(d)
return err
})
if err != nil {
return nil, fmt.Errorf("[systemcharts] failed to update deployment %s/%s: %w", d.Namespace, d.Name, err)
}
}
logrus.Infof("[systemcharts] enqueue %s", repoName)
h.clusterRepo.EnqueueAfter(repoName, 2*time.Second)
} else {
// Add the finalizer if it is absent, ensuring Wrangler can detect the deletion event
if index == -1 {
d.Finalizers = append(d.Finalizers, legacyAppFinalizer)
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
_, err := h.deployment.Update(d)
return err
})
if err != nil {
return nil, fmt.Errorf("[systemcharts] failed to update deployment %s/%s: %w", d.Namespace, d.Name, err)
}
}
}
Expand Down
10 changes: 6 additions & 4 deletions pkg/controllers/dashboard/systemcharts/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,17 @@ type testMocks struct {
manager *chartfake.MockManager
namespaceCtrl *fake.MockNonNamespacedControllerInterface[*v1.Namespace, *v1.NamespaceList]
configCache *fake.MockCacheInterface[*v1.ConfigMap]
deployment *fake.MockControllerInterface[*appsv1.Deployment, *appsv1.DeploymentList]
deploymentCache *fake.MockCacheInterface[*appsv1.Deployment]
}

func (t *testMocks) Handler() *handler {
return &handler{
manager: t.manager,
namespaces: t.namespaceCtrl,
chartsConfig: chart.RancherConfigGetter{ConfigCache: t.configCache},
deployment: t.deploymentCache,
manager: t.manager,
namespaces: t.namespaceCtrl,
chartsConfig: chart.RancherConfigGetter{ConfigCache: t.configCache},
deployment: t.deployment,
deploymentCache: t.deploymentCache,
}
}

Expand Down
11 changes: 0 additions & 11 deletions pkg/controllers/management/usercontrollers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,17 +242,6 @@ func (c *ClusterLifecycleCleanup) createCleanupClusterRole(userContext *config.U
Resources: []string{"validatingwebhookconfigurations", "mutatingwebhookconfigurations"},
ResourceNames: []string{WebhookConfigurationName},
},
// This is needed for removing finalizers from deployments
{
Verbs: []string{"list", "get", "update"},
APIGroups: []string{"apps"},
Resources: []string{"deployments"},
},
{
Verbs: []string{"list"},
APIGroups: []string{""},
Resources: []string{"pods"},
},
}
clusterRole := rbacV1.ClusterRole{
ObjectMeta: meta,
Expand Down

0 comments on commit 66ce2f1

Please sign in to comment.