From 9f3fbe3f920ed811845d5bfdd3f0c25175f35681 Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Mon, 28 Oct 2024 18:09:06 +0100 Subject: [PATCH] update: monitoring stack only apply if it is managed cluster - keep managed cluster logic as-is - create redhat-ods-monitoring namespace only in managed if dsic set monitoring to enabled - do not create redhat-ods-monitoring namespace in self-managed any more even monitoring is enabled - remove role rolebinding and servicemonitor in upgrade for self-managed - do not apply role rolebinding networkpoliy and servicemonitor for monitoring in clean install for self-managed Signed-off-by: Wen Zhou --- .../dscinitialization_controller.go | 19 ++++--------------- .../dscinitialization_test.go | 5 ++--- controllers/dscinitialization/utils.go | 18 ++++++++++-------- pkg/upgrade/upgrade.go | 17 ++++++++++++++--- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/controllers/dscinitialization/dscinitialization_controller.go b/controllers/dscinitialization/dscinitialization_controller.go index b9156aa7c16..efd0c52ebc9 100644 --- a/controllers/dscinitialization/dscinitialization_controller.go +++ b/controllers/dscinitialization/dscinitialization_controller.go @@ -234,22 +234,13 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re if !createUsergroup { log.Info("DSCI disabled usergroup creation") } else { - err := r.createUserGroup(ctx, instance, "rhods-admins") - if err != nil { - return reconcile.Result{}, err - } - } - if instance.Spec.Monitoring.ManagementState == operatorv1.Managed { - log.Info("Monitoring enabled, won't apply changes", "cluster", "Self-Managed RHODS Mode") - err = r.configureCommonMonitoring(ctx, instance) - if err != nil { + if err := r.createUserGroup(ctx, instance, "rhods-admins"); err != nil { return reconcile.Result{}, err } } case cluster.ManagedRhoai: osdConfigsPath := filepath.Join(deploy.DefaultManifestPath, "osd-configs") - err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true) - if err != nil { + if err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true); err != nil { log.Error(err, "Failed to apply osd specific configs from manifests", "Manifests path", osdConfigsPath) r.Recorder.Eventf(instance, corev1.EventTypeWarning, "DSCInitializationReconcileError", "Failed to apply "+osdConfigsPath) @@ -257,12 +248,10 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re } if instance.Spec.Monitoring.ManagementState == operatorv1.Managed { log.Info("Monitoring enabled in initialization stage", "cluster", "Managed Service Mode") - err := r.configureManagedMonitoring(ctx, instance, "init") - if err != nil { + if err := r.configureManagedMonitoring(ctx, instance, "init"); err != nil { return reconcile.Result{}, err } - err = r.configureCommonMonitoring(ctx, instance) - if err != nil { + if err = r.configureCommonMonitoring(ctx, instance); err != nil { return reconcile.Result{}, err } } diff --git a/controllers/dscinitialization/dscinitialization_test.go b/controllers/dscinitialization/dscinitialization_test.go index d9afb51deb4..15e1f65d688 100644 --- a/controllers/dscinitialization/dscinitialization_test.go +++ b/controllers/dscinitialization/dscinitialization_test.go @@ -143,7 +143,7 @@ var _ = Describe("DataScienceCluster initialization", func() { WithPolling(interval). Should(BeFalse()) }) - It("Should create default monitoring namespace if monitoring enabled", func(ctx context.Context) { + It("Should not create default monitoring namespace even monitoring enabled for non-managed cluster", func(ctx context.Context) { // when desiredDsci := createDSCI(operatorv1.Managed, operatorv1.Managed, monitoringNamespace2) Expect(k8sClient.Create(ctx, desiredDsci)).Should(Succeed()) @@ -159,8 +159,7 @@ var _ = Describe("DataScienceCluster initialization", func() { WithContext(ctx). WithTimeout(timeout). WithPolling(interval). - Should(BeTrue()) - Expect(foundMonitoringNamespace.Name).Should(Equal(monitoringNamespace2)) + Should(BeFalse()) }) }) diff --git a/controllers/dscinitialization/utils.go b/controllers/dscinitialization/utils.go index b434278a1ad..bf050304fc3 100644 --- a/controllers/dscinitialization/utils.go +++ b/controllers/dscinitialization/utils.go @@ -81,8 +81,8 @@ func (r *DSCInitializationReconciler) createOdhNamespace(ctx context.Context, ds return err } } - // Create Monitoring Namespace if it is enabled and not exists - if dscInit.Spec.Monitoring.ManagementState == operatorv1.Managed { + // Create Monitoring Namespace if it is enabled and not exists and only for Managed cluster + if dscInit.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhoai { foundMonitoringNamespace := &corev1.Namespace{} monitoringName := dscInit.Spec.Monitoring.Namespace err := r.Get(ctx, client.ObjectKey{Name: monitoringName}, foundMonitoringNamespace) @@ -205,18 +205,20 @@ func (r *DSCInitializationReconciler) reconcileDefaultNetworkPolicy(ctx context. log.Error(err, "error to set networkpolicy in operator namespace", "path", networkpolicyPath) return err } - // Deploy networkpolicy for monitoring namespace - err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/monitoring", dscInit.Spec.Monitoring.Namespace, "networkpolicy", true) - if err != nil { - log.Error(err, "error to set networkpolicy in monitroing namespace", "path", networkpolicyPath) - return err - } // Deploy networkpolicy for applications namespace err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/applications", dscInit.Spec.ApplicationsNamespace, "networkpolicy", true) if err != nil { log.Error(err, "error to set networkpolicy in applications namespace", "path", networkpolicyPath) return err } + if platform == cluster.ManagedRhoai { + // Deploy networkpolicy for monitoring namespace + err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/monitoring", dscInit.Spec.Monitoring.Namespace, "networkpolicy", true) + if err != nil { + log.Error(err, "error to set networkpolicy in monitroing namespace", "path", networkpolicyPath) + return err + } + } } else { // Expected namespace for the given name in ODH desiredNetworkPolicy := &networkingv1.NetworkPolicy{ TypeMeta: metav1.TypeMeta{ diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 48c7fd17068..90e544970c2 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -218,7 +218,7 @@ func CleanupExistingResource(ctx context.Context, oldReleaseVersion cluster.Release, ) error { var multiErr *multierror.Error - // Special Handling of cleanup of deprecated model monitoring stack + // Special Handling of cleanup of deprecated model monitoring stack on managed if platform == cluster.ManagedRhoai { deprecatedDeployments := []string{"rhods-prometheus-operator"} multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedDeployments, &appsv1.DeploymentList{})) @@ -247,9 +247,20 @@ func CleanupExistingResource(ctx context.Context, deprecatedServicemonitors := []string{"modelmesh-federated-metrics"} multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedServicemonitors)) } + // Special Handling of cleanup of deprecated SRE monitoring stack on self-managed + if platform == cluster.SelfManagedRhoai { + deprecatedOperatorSM := []string{"rhods-monitor-federation"} + multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM)) + + deprecatedRolebindings := []string{"rhods-prometheus-cluster-monitoring-viewer-binding", "redhat-ods-monitoring"} + multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedRolebindings, &rbacv1.RoleBindingList{})) + + deprecatedRroles := []string{"redhat-ods-monitoring"} + multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedRroles, &rbacv1.RoleList{})) + } // common logic for both self-managed and managed - deprecatedOperatorSM := []string{"rhods-monitor-federation2"} - multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM)) + deprecatedOperatorSM2 := []string{"rhods-monitor-federation2"} + multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM2)) // Remove deprecated opendatahub namespace(previously owned by kuberay and Kueue) multiErr = multierror.Append(multiErr, deleteDeprecatedNamespace(ctx, cli, "opendatahub"))