Skip to content

Commit

Permalink
update: monitoring stack only apply if it is managed cluster
Browse files Browse the repository at this point in the history
- keep managed cluster logic as-is
- create redhat-ods-monitoring namespace only in managed if dsic set monitoring to enabled
- do not create redhat-ods-monitoring namespace in self-managed any more even monitoring is enabled
- remove role rolebinding and servicemonitor in upgrade for self-managed
- do not apply role rolebinding networkpoliy and servicemonitor for monitoring in clean install for self-managed

Signed-off-by: Wen Zhou <[email protected]>
  • Loading branch information
zdtsw committed Dec 2, 2024
1 parent 0b4b1f4 commit f118a6a
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 31 deletions.
19 changes: 4 additions & 15 deletions controllers/dscinitialization/dscinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,35 +234,24 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re
if !createUsergroup {
log.Info("DSCI disabled usergroup creation")
} else {
err := r.createUserGroup(ctx, instance, "rhods-admins")
if err != nil {
return reconcile.Result{}, err
}
}
if instance.Spec.Monitoring.ManagementState == operatorv1.Managed {
log.Info("Monitoring enabled, won't apply changes", "cluster", "Self-Managed RHODS Mode")
err = r.configureCommonMonitoring(ctx, instance)
if err != nil {
if err := r.createUserGroup(ctx, instance, "rhods-admins"); err != nil {
return reconcile.Result{}, err
}
}
case cluster.ManagedRhoai:
osdConfigsPath := filepath.Join(deploy.DefaultManifestPath, "osd-configs")
err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true)
if err != nil {
if err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true); err != nil {
log.Error(err, "Failed to apply osd specific configs from manifests", "Manifests path", osdConfigsPath)
r.Recorder.Eventf(instance, corev1.EventTypeWarning, "DSCInitializationReconcileError", "Failed to apply "+osdConfigsPath)

return reconcile.Result{}, err
}
if instance.Spec.Monitoring.ManagementState == operatorv1.Managed {
log.Info("Monitoring enabled in initialization stage", "cluster", "Managed Service Mode")
err := r.configureManagedMonitoring(ctx, instance, "init")
if err != nil {
if err := r.configureManagedMonitoring(ctx, instance, "init"); err != nil {
return reconcile.Result{}, err
}
err = r.configureCommonMonitoring(ctx, instance)
if err != nil {
if err = r.configureCommonMonitoring(ctx, instance); err != nil {
return reconcile.Result{}, err
}
}
Expand Down
5 changes: 2 additions & 3 deletions controllers/dscinitialization/dscinitialization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ var _ = Describe("DataScienceCluster initialization", func() {
WithPolling(interval).
Should(BeFalse())
})
It("Should create default monitoring namespace if monitoring enabled", func(ctx context.Context) {
It("Should not create default monitoring namespace even monitoring enabled for non-managed cluster", func(ctx context.Context) {
// when
desiredDsci := createDSCI(operatorv1.Managed, operatorv1.Managed, monitoringNamespace2)
Expect(k8sClient.Create(ctx, desiredDsci)).Should(Succeed())
Expand All @@ -159,8 +159,7 @@ var _ = Describe("DataScienceCluster initialization", func() {
WithContext(ctx).
WithTimeout(timeout).
WithPolling(interval).
Should(BeTrue())
Expect(foundMonitoringNamespace.Name).Should(Equal(monitoringNamespace2))
Should(BeFalse())
})
})

Expand Down
2 changes: 1 addition & 1 deletion controllers/dscinitialization/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestDataScienceClusterInitialization(t *testing.T) {

var testScheme = runtime.NewScheme()

//nolint:fatcontext

var _ = BeforeSuite(func() {
// can't use suite's context as the manager should survive the function
gCtx, gCancel = context.WithCancel(context.Background())

Check failure on line 83 in controllers/dscinitialization/suite_test.go

View workflow job for this annotation

GitHub Actions / golangci-lint

nested context in function literal (fatcontext)
Expand Down
18 changes: 10 additions & 8 deletions controllers/dscinitialization/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ func (r *DSCInitializationReconciler) createOdhNamespace(ctx context.Context, ds
return err
}
}
// Create Monitoring Namespace if it is enabled and not exists
if dscInit.Spec.Monitoring.ManagementState == operatorv1.Managed {
// Create Monitoring Namespace if it is enabled and not exists and only for Managed cluster
if dscInit.Spec.Monitoring.ManagementState == operatorv1.Managed && platform == cluster.ManagedRhoai {
foundMonitoringNamespace := &corev1.Namespace{}
monitoringName := dscInit.Spec.Monitoring.Namespace
err := r.Get(ctx, client.ObjectKey{Name: monitoringName}, foundMonitoringNamespace)
Expand Down Expand Up @@ -205,18 +205,20 @@ func (r *DSCInitializationReconciler) reconcileDefaultNetworkPolicy(ctx context.
log.Error(err, "error to set networkpolicy in operator namespace", "path", networkpolicyPath)
return err
}
// Deploy networkpolicy for monitoring namespace
err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/monitoring", dscInit.Spec.Monitoring.Namespace, "networkpolicy", true)
if err != nil {
log.Error(err, "error to set networkpolicy in monitroing namespace", "path", networkpolicyPath)
return err
}
// Deploy networkpolicy for applications namespace
err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/applications", dscInit.Spec.ApplicationsNamespace, "networkpolicy", true)
if err != nil {
log.Error(err, "error to set networkpolicy in applications namespace", "path", networkpolicyPath)
return err
}
if platform == cluster.ManagedRhoai {
// Deploy networkpolicy for monitoring namespace
err = deploy.DeployManifestsFromPath(ctx, r.Client, dscInit, networkpolicyPath+"/monitoring", dscInit.Spec.Monitoring.Namespace, "networkpolicy", true)
if err != nil {
log.Error(err, "error to set networkpolicy in monitroing namespace", "path", networkpolicyPath)
return err
}
}
} else { // Expected namespace for the given name in ODH
desiredNetworkPolicy := &networkingv1.NetworkPolicy{
TypeMeta: metav1.TypeMeta{
Expand Down
2 changes: 1 addition & 1 deletion controllers/webhook/webhook_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func TestAPIs(t *testing.T) {
RunSpecs(t, "Webhook Suite")
}

//nolint:fatcontext

var _ = BeforeSuite(func() {
// can't use suite's context as the manager should survive the function
gCtx, gCancel = context.WithCancel(context.Background())

Check failure on line 85 in controllers/webhook/webhook_suite_test.go

View workflow job for this annotation

GitHub Actions / golangci-lint

nested context in function literal (fatcontext)
Expand Down
17 changes: 14 additions & 3 deletions pkg/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func CleanupExistingResource(ctx context.Context,
oldReleaseVersion cluster.Release,
) error {
var multiErr *multierror.Error
// Special Handling of cleanup of deprecated model monitoring stack
// Special Handling of cleanup of deprecated model monitoring stack on managed
if platform == cluster.ManagedRhoai {
deprecatedDeployments := []string{"rhods-prometheus-operator"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedDeployments, &appsv1.DeploymentList{}))
Expand Down Expand Up @@ -247,9 +247,20 @@ func CleanupExistingResource(ctx context.Context,
deprecatedServicemonitors := []string{"modelmesh-federated-metrics"}
multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedServicemonitors))
}
// Special Handling of cleanup of deprecated SRE monitoring stack on self-managed
if platform == cluster.SelfManagedRhoai {
deprecatedOperatorSM := []string{"rhods-monitor-federation"}
multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM))

deprecatedRolebindings := []string{"rhods-prometheus-cluster-monitoring-viewer-binding", "redhat-ods-monitoring"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedRolebindings, &rbacv1.RoleBindingList{}))

deprecatedRroles := []string{"redhat-ods-monitoring"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedRroles, &rbacv1.RoleList{}))
}
// common logic for both self-managed and managed
deprecatedOperatorSM := []string{"rhods-monitor-federation2"}
multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM))
deprecatedOperatorSM2 := []string{"rhods-monitor-federation2"}
multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM2))

// Remove deprecated opendatahub namespace(previously owned by kuberay and Kueue)
multiErr = multierror.Append(multiErr, deleteDeprecatedNamespace(ctx, cli, "opendatahub"))
Expand Down

0 comments on commit f118a6a

Please sign in to comment.