diff --git a/kube-prometheus-stack/kustomization.yaml b/kube-prometheus-stack/kustomization.yaml new file mode 100644 index 0000000..920eff1 --- /dev/null +++ b/kube-prometheus-stack/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: monitoring +resources: + - namespace.yaml + - repository.yaml + - release.yaml diff --git a/kube-prometheus-stack/namespace.yaml b/kube-prometheus-stack/namespace.yaml new file mode 100644 index 0000000..78abcfc --- /dev/null +++ b/kube-prometheus-stack/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + labels: + app.kubernetes.io/component: monitoring diff --git a/kube-prometheus-stack/release.yaml b/kube-prometheus-stack/release.yaml new file mode 100644 index 0000000..6c882c7 --- /dev/null +++ b/kube-prometheus-stack/release.yaml @@ -0,0 +1,162 @@ +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: kube-prometheus-stack +spec: + interval: 5m + chart: + spec: + version: "48.x" + chart: kube-prometheus-stack + sourceRef: + kind: HelmRepository + name: prometheus-community + interval: 60m + install: + crds: Create + upgrade: + crds: CreateReplace + # https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml + values: + alertmanager: + enabled: false + prometheus: + prometheusSpec: + retention: 24h + resources: + requests: + cpu: 200m + memory: 200Mi + podMonitorNamespaceSelector: {} + podMonitorSelector: + matchLabels: + app.kubernetes.io/component: monitoring + grafana: + defaultDashboardsEnabled: false + kube-state-metrics: + collectors: [] + extraArgs: + - --custom-resource-state-only=true + rbac: + extraRules: + - apiGroups: + - source.toolkit.fluxcd.io + - kustomize.toolkit.fluxcd.io + - helm.toolkit.fluxcd.io + - image.toolkit.fluxcd.io + - notification.toolkit.fluxcd.io + resources: + - gitrepositories + - buckets + - helmrepositories + - helmcharts + - ocirepositories + - kustomizations + - helmreleases + - imagerepositories + - imagepolicies + - imageupdateautomations + - alerts + - providers + - receivers + verbs: ["list", "watch"] + customResourceState: + enabled: true + config: + spec: + resources: + - &metric + groupVersionKind: + group: source.toolkit.fluxcd.io + version: "v1" + kind: GitRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [metadata, name] + labelsFromPath: + exported_namespace: [metadata, namespace] + ready: [status, conditions, "[type=Ready]", status] + - <<: *metric + groupVersionKind: + group: source.toolkit.fluxcd.io + version: "v1beta2" + kind: Bucket + - <<: *metric + groupVersionKind: + group: source.toolkit.fluxcd.io + version: "v1beta2" + kind: HelmRepository + - <<: *metric + groupVersionKind: + group: source.toolkit.fluxcd.io + version: "v1beta2" + kind: HelmChart + - <<: *metric + groupVersionKind: + group: source.toolkit.fluxcd.io + version: "v1beta2" + kind: OCIRepository + - <<: *metric + groupVersionKind: + group: kustomize.toolkit.fluxcd.io + version: "v1" + kind: Kustomization + - <<: *metric + groupVersionKind: + group: helm.toolkit.fluxcd.io + version: "v2beta1" + kind: HelmRelease + - <<: *metric + groupVersionKind: + group: image.toolkit.fluxcd.io + version: "v1beta2" + kind: ImageRepository + - <<: *metric + groupVersionKind: + group: image.toolkit.fluxcd.io + version: "v1beta2" + kind: ImagePolicy + - <<: *metric + groupVersionKind: + group: image.toolkit.fluxcd.io + version: "v1beta1" + kind: ImageUpdateAutomation + - <<: *metric + groupVersionKind: + group: notification.toolkit.fluxcd.io + version: "v1beta2" + kind: Alert + - <<: *metric + groupVersionKind: + group: notification.toolkit.fluxcd.io + version: "v1beta2" + kind: Provider + - <<: *metric + groupVersionKind: + group: notification.toolkit.fluxcd.io + version: "v1" + kind: Receiver + postRenderers: + - kustomize: + patches: + - target: + # Ignore these objects from Flux diff as they are mutated from chart hooks + kind: (ValidatingWebhookConfiguration|MutatingWebhookConfiguration) + name: kube-prometheus-stack-admission + patch: | + - op: add + path: /metadata/annotations/helm.toolkit.fluxcd.io~1driftDetection + value: disabled + - target: + # Ignore these objects from Flux diff as they are mutated at apply time but not at dry-run time + kind: PrometheusRule + patch: | + - op: add + path: /metadata/annotations/helm.toolkit.fluxcd.io~1driftDetection + value: disabled diff --git a/kube-prometheus-stack/repository.yaml b/kube-prometheus-stack/repository.yaml new file mode 100644 index 0000000..552a341 --- /dev/null +++ b/kube-prometheus-stack/repository.yaml @@ -0,0 +1,8 @@ +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: prometheus-community +spec: + interval: 120m + type: default + url: https://prometheus-community.github.io/helm-charts diff --git a/loki-stack/kustomization.yaml b/loki-stack/kustomization.yaml new file mode 100644 index 0000000..d6d4338 --- /dev/null +++ b/loki-stack/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: monitoring +resources: + - repository.yaml + - release.yaml diff --git a/loki-stack/release.yaml b/loki-stack/release.yaml new file mode 100644 index 0000000..4bb4a2c --- /dev/null +++ b/loki-stack/release.yaml @@ -0,0 +1,34 @@ +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: loki-stack +spec: + interval: 5m + dependsOn: + - name: kube-prometheus-stack + chart: + spec: + version: "2.x" + chart: loki-stack + sourceRef: + kind: HelmRepository + name: grafana-charts + interval: 60m + # https://github.com/grafana/helm-charts/blob/main/charts/loki-stack/values.yaml + # https://github.com/grafana/loki/blob/main/production/helm/loki/values.yaml + values: + promtail: + enabled: true + loki: + enabled: true + isDefault: false + serviceMonitor: + enabled: true + additionalLabels: + app.kubernetes.io/part-of: kube-prometheus-stack + config: + chunk_store_config: + max_look_back_period: 0s + table_manager: + retention_deletes_enabled: true + retention_period: 12h diff --git a/loki-stack/repository.yaml b/loki-stack/repository.yaml new file mode 100644 index 0000000..49f58cd --- /dev/null +++ b/loki-stack/repository.yaml @@ -0,0 +1,7 @@ +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: grafana-charts +spec: + interval: 120m0s + url: https://grafana.github.io/helm-charts diff --git a/monitoring-config/dashboards/cluster.json b/monitoring-config/dashboards/cluster.json new file mode 100644 index 0000000..47bb344 --- /dev/null +++ b/monitoring-config/dashboards/cluster.json @@ -0,0 +1,1198 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "iconColor": "red", + "name": "flux events", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 3, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"Kustomization|HelmRelease\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Cluster Reconcilers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 28, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"Kustomization|HelmRelease\", ready=\"False\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Failing Reconcilers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 29, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Kubernetes Manifests Sources", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 30, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\", ready=\"False\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Failing Sources", + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 61 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 8, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind)", + "interval": "", + "legendFormat": "{{kind}}", + "refId": "A" + } + ], + "title": "Reconciler ops avg. duration", + "type": "bargauge" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 61 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 31, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind)", + "interval": "", + "legendFormat": "{{kind}}", + "refId": "A" + } + ], + "title": "Source ops avg. duration", + "type": "bargauge" + }, + { + "collapsed": false, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 15, + "panels": [], + "title": "Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "Not Ready" + }, + "True": { + "color": "blue", + "index": 0, + "text": "Ready" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "string" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 33, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Status" + } + ] + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"Kustomization|HelmRelease\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster reconciliation readiness ", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "app": true, + "container": true, + "customresource_group": true, + "customresource_kind": false, + "customresource_version": true, + "endpoint": true, + "exported_namespace": false, + "gotk_type": true, + "instance": true, + "job": true, + "kubernetes_namespace": true, + "kubernetes_pod_name": true, + "namespace": true, + "pod": true, + "pod_template_hash": true, + "service": true, + "status": true, + "type": true + }, + "indexByName": { + "Time": 0, + "Value": 15, + "__name__": 1, + "container": 2, + "customresource_group": 4, + "customresource_kind": 5, + "customresource_version": 6, + "endpoint": 7, + "exported_namespace": 3, + "instance": 8, + "job": 9, + "name": 10, + "namespace": 11, + "pod": 12, + "ready": 13, + "service": 14 + }, + "renameByName": { + "Value": "", + "customresource_kind": "Kind", + "exported_namespace": "Namespace", + "kind": "Kind", + "name": "Name", + "namespace": "Operator Namespace", + "ready": "Status" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "Not Ready" + }, + "True": { + "color": "blue", + "index": 0, + "text": "Ready" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "string" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "noValue", + "value": "Ready" + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 34, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Status" + } + ] + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Source acquisition readiness ", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "app": true, + "container": true, + "customresource_group": true, + "customresource_kind": false, + "customresource_version": true, + "endpoint": true, + "exported_namespace": false, + "gotk_type": true, + "instance": true, + "job": true, + "kubernetes_namespace": true, + "kubernetes_pod_name": true, + "namespace": true, + "pod": true, + "pod_template_hash": true, + "ready": false, + "service": true, + "status": true, + "type": true + }, + "indexByName": { + "Time": 0, + "Value": 15, + "__name__": 1, + "container": 2, + "customresource_group": 5, + "customresource_kind": 6, + "customresource_version": 7, + "endpoint": 8, + "exported_namespace": 4, + "instance": 9, + "job": 10, + "name": 11, + "namespace": 3, + "pod": 12, + "ready": 13, + "service": 14 + }, + "renameByName": { + "Value": "", + "customresource_kind": "Kind", + "exported_namespace": "Namespace", + "kind": "Kind", + "name": "Name", + "namespace": "Operator Namespace", + "ready": "Status" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 17, + "panels": [], + "title": "Timing", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind, name)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind, name)", + "hide": false, + "interval": "", + "legendFormat": "{{kind}}/{{name}}", + "refId": "B" + } + ], + "title": "Cluster reconciliation duration", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind, name)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind, name)", + "hide": false, + "interval": "", + "legendFormat": "{{kind}}/{{name}}", + "refId": "B" + } + ], + "title": "Source acquisition duration", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "light", + "tags": [ + "flux" + ], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$DS_PROMETHEUS" + }, + "definition": "label_values(gotk_reconcile_condition, namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "operator_namespace", + "options": [], + "query": { + "query": "label_values(gotk_reconcile_condition, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$DS_PROMETHEUS" + }, + "definition": "label_values(gotk_resource_info,exported_namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(gotk_resource_info,exported_namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Flux Cluster Stats", + "uid": "flux-cluster", + "version": 4, + "weekStart": "" +} diff --git a/monitoring-config/dashboards/control-plane.json b/monitoring-config/dashboards/control-plane.json new file mode 100644 index 0000000..588c455 --- /dev/null +++ b/monitoring-config/dashboards/control-plane.json @@ -0,0 +1,1724 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "iconColor": "red", + "name": "flux events", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(go_info{namespace=\"$namespace\",pod=~\".*-controller-.*\"})", + "interval": "", + "legendFormat": "pods", + "refId": "A" + } + ], + "title": "Controllers", + "type": "stat" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "max(workqueue_longest_running_processor_seconds{namespace=\"$namespace\",pod=~\".*-controller-.*\"})", + "hide": false, + "interval": "", + "legendFormat": "seconds", + "refId": "B" + } + ], + "title": "Max Work Queue", + "type": "stat" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 500000000 + }, + { + "color": "red", + "value": 900000000 + } + ] + }, + "unit": "decbits" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 25, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(go_memstats_alloc_bytes{namespace=\"$namespace\",pod=~\".*-controller-.*\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Memory", + "type": "gauge" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 26, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(rest_client_requests_total{namespace=\"$namespace\",pod=~\".*-controller-.*\"}[1m]))", + "interval": "", + "legendFormat": "requests", + "refId": "A" + } + ], + "title": "API Requests", + "type": "stat" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(rest_client_requests_total{namespace=\"$namespace\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "total", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(rest_client_requests_total{namespace=\"$namespace\",code!~\"2..\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "errors", + "refId": "B" + } + ], + "title": "Kubernetes API Requests", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 15, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Resource Usage", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(process_cpu_seconds_total{namespace=\"$namespace\",pod=~\".*-controller-.*\"}[1m])", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",container!=\"POD\",container!=\"\",pod=~\".*-controller-.*\"}) by (pod)", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 17, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Reconciliation Stats", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "workqueue_longest_running_processor_seconds{name=\"kustomization\"}", + "hide": false, + "interval": "", + "legendFormat": "kustomizations", + "refId": "B" + } + ], + "title": "Cluster Reconciliation Duration", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"kustomization\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful reconciliations ", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"kustomization\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed reconciliations ", + "refId": "B" + } + ], + "title": "Cluster Reconciliations ops/min", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 29, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Sources Stats", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"gitrepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful git pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"gitrepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed git pulls", + "refId": "B" + } + ], + "title": "Git Repos ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"ocirepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful oci pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"ocirepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed oci pulls", + "refId": "B" + } + ], + "title": "OCI Repos ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful helm pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed helm pulls", + "refId": "B" + } + ], + "title": "Helm Repos ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"bucket\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful bucket pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"bucket\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed bucket pulls", + "refId": "B" + } + ], + "title": "Buckets ops/min", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 19, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Helm Stats", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.50, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P50", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.90, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P90", + "refId": "B" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.99, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "P99", + "refId": "C" + } + ], + "title": "Helm Release Duration", + "type": "timeseries" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "decimals": 2, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 70 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.0.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrelease\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful reconciliations ", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrelease\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed reconciliations ", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Helm Releases ops/min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1102", + "format": "opm", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1103", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmchart\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful chart pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmchart\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed chart pulls", + "refId": "B" + } + ], + "title": "Helm Charts ops/min", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "light", + "tags": [ + "flux" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 2, + "includeAll": false, + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "flux-system", + "value": "flux-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "workqueue_work_duration_seconds_count", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "workqueue_work_duration_seconds_count", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 2, + "regex": "/.*namespace=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Flux Control Plane", + "uid": "flux-control-plane", + "version": 2, + "weekStart": "" +} diff --git a/monitoring-config/dashboards/logs.json b/monitoring-config/dashboards/logs.json new file mode 100644 index 0000000..4d9f58d --- /dev/null +++ b/monitoring-config/dashboards/logs.json @@ -0,0 +1,332 @@ +{ + "__inputs": [ + { + "name": "DS_LOKI", + "label": "Loki", + "description": "", + "type": "datasource", + "pluginId": "loki", + "pluginName": "Loki" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "iconColor": "red", + "name": "flux events", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "description": "Flux logs collected from Kubernetes, stored in Loki", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 29, + "iteration": 1653748775696, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "${DS_LOKI}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_LOKI}", + "expr": "sum(count_over_time({namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\" [$__interval]))", + "instant": false, + "legendFormat": "Log count", + "range": true, + "refId": "A" + } + ], + "type": "timeseries" + }, + { + "datasource": "${DS_LOKI}", + "description": "Logs from services running in Kubernetes", + "gridPos": { + "h": 25, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "options": { + "dedupStrategy": "numbers", + "enableLogDetails": false, + "prettifyLogMessage": true, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": "${DS_LOKI}", + "expr": "{namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\"", + "refId": "A" + } + ], + "type": "logs" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "style": "light", + "tags": [ + "flux" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "description": "String to search for", + "hide": 0, + "label": "Search Query", + "name": "query", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "allValue": "info|error", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "multi": false, + "name": "level", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "info", + "value": "info" + }, + { + "selected": false, + "text": "error", + "value": "error" + } + ], + "query": "info,error", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${DS_LOKI}", + "definition": "label_values(app)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "controller", + "options": [], + "query": "label_values(app)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "flux-system" + ], + "value": [ + "flux-system" + ] + }, + "datasource": "${DS_LOKI}", + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_LOKI}", + "definition": "label_values(stream)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "stream", + "options": [], + "query": "label_values(stream)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "Loki" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "DS_LOKI", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Flux Logs", + "uid": "flux-logs", + "version": 2 +} diff --git a/monitoring-config/kustomization.yaml b/monitoring-config/kustomization.yaml new file mode 100644 index 0000000..29dcad8 --- /dev/null +++ b/monitoring-config/kustomization.yaml @@ -0,0 +1,16 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: monitoring +resources: + - podmonitor.yaml +configMapGenerator: + - name: flux-grafana-dashboards + files: + - dashboards/control-plane.json + - dashboards/cluster.json + - dashboards/logs.json + options: + labels: + grafana_dashboard: "1" + app.kubernetes.io/part-of: flux + app.kubernetes.io/component: monitoring diff --git a/monitoring-config/podmonitor.yaml b/monitoring-config/podmonitor.yaml new file mode 100644 index 0000000..d338f1f --- /dev/null +++ b/monitoring-config/podmonitor.yaml @@ -0,0 +1,29 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: flux-system + labels: + app.kubernetes.io/part-of: flux + app.kubernetes.io/component: monitoring +spec: + namespaceSelector: + matchNames: + - flux-system + selector: + matchExpressions: + - key: app + operator: In + values: + - helm-controller + - source-controller + - kustomize-controller + - notification-controller + - image-automation-controller + - image-reflector-controller + podMetricsEndpoints: + - port: http-prom + relabelings: + # https://github.com/prometheus-operator/prometheus-operator/issues/4816 + - sourceLabels: [__meta_kubernetes_pod_phase] + action: keep + regex: Running