From c7ed0be544622fe49931fd14c22f0bce25174bef Mon Sep 17 00:00:00 2001 From: Phenix66 <34311559+Phenix66@users.noreply.github.com> Date: Tue, 9 Jul 2024 11:51:51 -0400 Subject: [PATCH 1/2] Add condition to Alert Manager default dashboard to prevent adding the dashboard when AM is disabled Signed-off-by: Phenix66 <34311559+Phenix66@users.noreply.github.com> --- charts/kube-prometheus-stack/Chart.yaml | 2 +- charts/kube-prometheus-stack/hack/sync_grafana_dashboards.py | 1 + .../grafana/dashboards-1.14/alertmanager-overview.yaml | 2 +- charts/kube-prometheus-stack/values.yaml | 4 ++++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/charts/kube-prometheus-stack/Chart.yaml b/charts/kube-prometheus-stack/Chart.yaml index f06d48cb6195..c1ee23e2153c 100644 --- a/charts/kube-prometheus-stack/Chart.yaml +++ b/charts/kube-prometheus-stack/Chart.yaml @@ -23,7 +23,7 @@ name: kube-prometheus-stack sources: - https://github.com/prometheus-community/helm-charts - https://github.com/prometheus-operator/kube-prometheus -version: 61.3.2 +version: 61.4.0 appVersion: v0.75.1 kubeVersion: ">=1.19.0-0" home: https://github.com/prometheus-operator/kube-prometheus diff --git a/charts/kube-prometheus-stack/hack/sync_grafana_dashboards.py b/charts/kube-prometheus-stack/hack/sync_grafana_dashboards.py index 60f5528a5d24..f60097345260 100755 --- a/charts/kube-prometheus-stack/hack/sync_grafana_dashboards.py +++ b/charts/kube-prometheus-stack/hack/sync_grafana_dashboards.py @@ -78,6 +78,7 @@ def new_representer(dumper, data): # Additional conditions map condition_map = { + 'alertmanager-overview': ' (or .Values.alertmanager.enabled .Values.alertmanager.forceDeployDashboards)', 'grafana-coredns-k8s': ' .Values.coreDns.enabled', 'etcd': ' .Values.kubeEtcd.enabled', 'apiserver': ' .Values.kubeApiServer.enabled', diff --git a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml index e80bc00982c9..6deefd1bd491 100644 --- a/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml +++ b/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml @@ -4,7 +4,7 @@ Do not change in-place! In order to change this file first read following link: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.alertmanager.enabled .Values.alertmanager.forceDeployDashboards) }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/charts/kube-prometheus-stack/values.yaml b/charts/kube-prometheus-stack/values.yaml index efab16adba5b..2eb3f3faf8e2 100644 --- a/charts/kube-prometheus-stack/values.yaml +++ b/charts/kube-prometheus-stack/values.yaml @@ -279,6 +279,10 @@ alertmanager: ## enableFeatures: [] + ## Create dashboard configmap even if alertmanager deployment has been disabled + ## + forceDeployDashboards: false + ## Service account for Alertmanager to use. ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ ## From b5983d0e05c25d0b2f82787c11c6631b2f1b3ad0 Mon Sep 17 00:00:00 2001 From: Phenix66 <34311559+Phenix66@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:54:50 -0400 Subject: [PATCH 2/2] Update prometheus rules per linting workflow Signed-off-by: Phenix66 <34311559+Phenix66@users.noreply.github.com> --- .../templates/prometheus/rules-1.14/kubernetes-apps.yaml | 2 +- .../templates/prometheus/rules-1.14/kubernetes-resources.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml index 76215b3999e8..1a8946bd136a 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml @@ -269,7 +269,7 @@ spec: summary: StatefulSet update has not been rolled out. expr: |- ( - max without (revision) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, statefulset) ( kube_statefulset_status_current_revision{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} unless kube_statefulset_status_update_revision{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} diff --git a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml index 9111285250b3..3f7921bca500 100644 --- a/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml +++ b/charts/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml @@ -38,7 +38,7 @@ spec: runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuovercommit summary: Cluster has overcommitted CPU resource requests. expr: |- - sum(namespace_cpu:kube_pod_container_resource_requests:sum{job="{{ $kubeStateMetricsJob }}",}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - (sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 + sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - (sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 and (sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 for: {{ dig "KubeCPUOvercommit" "for" "10m" .Values.customRules }}