From ed43b3dfe089209231a8c73e6d98cabb5ed7a7b5 Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Wed, 24 Jun 2020 15:48:56 -0400 Subject: [PATCH] Add new spec.enableMonitoring field to enable cluster monitoring Setting this field to true will cause the cluster monitoring operator to get enabled when the cluster boots. Setting it to false will mark the cluster monitoring operator as unmanaged. Leaving it unset entirely will not enforce either from the operator. --- ...eveloper.openshift.io_crcclusters_crd.yaml | 11 +++ pkg/apis/crc/v1alpha1/crccluster_types.go | 12 ++++ .../crc/v1alpha1/zz_generated.deepcopy.go | 7 +- .../crccluster/crccluster_controller.go | 67 ++++++++++++++++++- 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/deploy/crds/crc.developer.openshift.io_crcclusters_crd.yaml b/deploy/crds/crc.developer.openshift.io_crcclusters_crd.yaml index b8b5aa1..1021ef9 100644 --- a/deploy/crds/crc.developer.openshift.io_crcclusters_crd.yaml +++ b/deploy/crds/crc.developer.openshift.io_crcclusters_crd.yaml @@ -49,6 +49,17 @@ spec: default: 4 description: CPU is the number of CPUs to allocate to the cluster type: integer + enableMonitoring: + description: EnableMonitoring indicates if this cluster should have + OpenShift's cluster-monitoring-operator enabled by default. It's + not suggested to enable this unless you assign at least 6 CPUs and + 16GB of memory to this cluster. If set to true or false, the operator + will enforce that choice every time the cluster is started. If left + unset entirely, the operator will not enforce either way. Setting + this to false will set the cluster-monitoring-operator to an unmanaged + state but it will not actually delete the resources out of the openshift-monitoring + namespace. + type: boolean memory: default: 16Gi description: Memory is the amount of memory to allocate to the cluster diff --git a/pkg/apis/crc/v1alpha1/crccluster_types.go b/pkg/apis/crc/v1alpha1/crccluster_types.go index 624b4b3..a3c1948 100644 --- a/pkg/apis/crc/v1alpha1/crccluster_types.go +++ b/pkg/apis/crc/v1alpha1/crccluster_types.go @@ -52,6 +52,18 @@ type CrcClusterSpec struct { // cluster when restarted. Stopped clusters with persistent // storage will retain their data between stops and starts. Stopped bool `json:"stopped,omitempty"` + + // EnableMonitoring indicates if this cluster should have + // OpenShift's cluster-monitoring-operator enabled by + // default. It's not suggested to enable this unless you assign at + // least 6 CPUs and 16GB of memory to this cluster. If set to true + // or false, the operator will enforce that choice every time the + // cluster is started. If left unset entirely, the operator will + // not enforce either way. Setting this to false will set the + // cluster-monitoring-operator to an unmanaged state but it will + // not actually delete the resources out of the + // openshift-monitoring namespace. + EnableMonitoring *bool `json:"enableMonitoring,omitempty"` } // CrcStorageSpec defines the desired storage of CrcCluster diff --git a/pkg/apis/crc/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/crc/v1alpha1/zz_generated.deepcopy.go index 9698204..f9065c0 100644 --- a/pkg/apis/crc/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/crc/v1alpha1/zz_generated.deepcopy.go @@ -107,7 +107,7 @@ func (in *CrcCluster) DeepCopyInto(out *CrcCluster) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) return } @@ -167,6 +167,11 @@ func (in *CrcClusterList) DeepCopyObject() runtime.Object { func (in *CrcClusterSpec) DeepCopyInto(out *CrcClusterSpec) { *out = *in out.Storage = in.Storage + if in.EnableMonitoring != nil { + in, out := &in.EnableMonitoring, &out.EnableMonitoring + *out = new(bool) + **out = **in + } return } diff --git a/pkg/controller/crccluster/crccluster_controller.go b/pkg/controller/crccluster/crccluster_controller.go index 51b6c52..9785a2c 100644 --- a/pkg/controller/crccluster/crccluster_controller.go +++ b/pkg/controller/crccluster/crccluster_controller.go @@ -67,7 +67,8 @@ var routesHelperImage = os.Getenv("ROUTES_HELPER_IMAGE") var bundleNs = os.Getenv("POD_NAMESPACE") const ( - sshPort int = 2022 + sshPort int = 2022 + monitoringNs string = "openshift-monitoring" ) // Add creates a new CrcCluster Controller and adds it to the Manager. The Manager will set fields on the Controller @@ -440,6 +441,15 @@ func (r *ReconcileCrcCluster) Reconcile(request reconcile.Request) (reconcile.Re return reconcile.Result{RequeueAfter: time.Second * 20}, nil } + if crc.Spec.EnableMonitoring != nil { + enableMonitoring := *crc.Spec.EnableMonitoring + reqLogger.Info("Enabling or disable monitoring", "EnableMonitoring", enableMonitoring) + if err := r.enableMonitoring(enableMonitoring, insecureCrcK8sConfig); err != nil { + reqLogger.Error(err, "Error enabling/disabling monitoring.") + return reconcile.Result{}, err + } + } + reqLogger.Info("Waiting on cluster to stabilize.") notReadyPods, err := r.waitForClusterToStabilize(insecureK8sClient) if err != nil { @@ -1175,7 +1185,7 @@ func (r *ReconcileCrcCluster) updateDefaultRoutes(crc *crcv1alpha1.CrcCluster, r defaultRouteNamespaces := []string{ "openshift-console", "openshift-image-registry", - "openshift-monitoring", + monitoringNs, } for _, routeNs := range defaultRouteNamespaces { routes, err := routeClient.Routes(routeNs).List(metav1.ListOptions{}) @@ -1196,6 +1206,59 @@ func (r *ReconcileCrcCluster) updateDefaultRoutes(crc *crcv1alpha1.CrcCluster, r return updatedRoutes, nil } +func (r *ReconcileCrcCluster) enableMonitoring(shouldEnable bool, restConfig *rest.Config) error { + configClient, err := configv1Client.NewForConfig(restConfig) + if err != nil { + return err + } + clusterVersion, err := configClient.ClusterVersions().Get("version", metav1.GetOptions{}) + if err != nil { + return err + } + clusterVersion = clusterVersion.DeepCopy() + + monitoringOverride := configv1.ComponentOverride{ + Kind: "Deployment", + Group: "apps/v1", + Name: "cluster-monitoring-operator", + Namespace: monitoringNs, + Unmanaged: true, + } + monitoringIndex := -1 + monitoringEnabled := true + for i, override := range clusterVersion.Spec.Overrides { + if override.Kind == monitoringOverride.Kind && override.Group == monitoringOverride.Group && override.Name == monitoringOverride.Name && override.Namespace == monitoringOverride.Namespace { + monitoringIndex = i + monitoringEnabled = !override.Unmanaged + break + } + } + if monitoringEnabled && !shouldEnable { + // Monitoring is enabled but should be disabled + if monitoringIndex >= 0 { + clusterVersion.Spec.Overrides[monitoringIndex].Unmanaged = true + } else { + clusterVersion.Spec.Overrides = append(clusterVersion.Spec.Overrides, monitoringOverride) + } + if _, err := configClient.ClusterVersions().Update(clusterVersion); err != nil { + return err + } + } else if !monitoringEnabled && shouldEnable { + // Monitoring is disabled but should be enabled + if monitoringIndex >= 0 { + clusterVersion.Spec.Overrides = append(clusterVersion.Spec.Overrides[:monitoringIndex], clusterVersion.Spec.Overrides[monitoringIndex+1:]...) + if _, err := configClient.ClusterVersions().Update(clusterVersion); err != nil { + return err + } + } else { + // We should never get here because how was it disabled if + // the index is < 0? + return fmt.Errorf("monitoring is disabled but no clusterversion override found") + } + } + return nil +} + func (r *ReconcileCrcCluster) ensureIngressControllersUpdated(crc *crcv1alpha1.CrcCluster, restConfig *rest.Config) error { operatorClient, err := operatorv1Client.NewForConfig(restConfig) if err != nil {