From 46a1938d19a49b69362bb103835ed8332dd1f114 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Sat, 14 Sep 2024 11:23:07 +0300 Subject: [PATCH] fixed ETCD dashboard cluster label --- .../victoria-metrics-k8s-stack/CHANGELOG.md | 2 +- .../files/dashboards/generated/etcd.yaml | 32 +++++++++---------- .../hack/sync_dashboards.py | 4 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/charts/victoria-metrics-k8s-stack/CHANGELOG.md b/charts/victoria-metrics-k8s-stack/CHANGELOG.md index 5ef467580..9418509dd 100644 --- a/charts/victoria-metrics-k8s-stack/CHANGELOG.md +++ b/charts/victoria-metrics-k8s-stack/CHANGELOG.md @@ -2,7 +2,7 @@ ## Next release -- TODO +- Fixed ETCD dashboard ## 0.25.16 diff --git a/charts/victoria-metrics-k8s-stack/files/dashboards/generated/etcd.yaml b/charts/victoria-metrics-k8s-stack/files/dashboards/generated/etcd.yaml index 1702b2075..dcb00c4cf 100644 --- a/charts/victoria-metrics-k8s-stack/files/dashboards/generated/etcd.yaml +++ b/charts/victoria-metrics-k8s-stack/files/dashboards/generated/etcd.yaml @@ -23,7 +23,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(etcd_server_has_leader{job=~".*etcd.*", job="$cluster"}) + expr: sum(etcd_server_has_leader{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}) legendFormat: '{{`{{`}}cluster{{`}}`}} - {{`{{`}}namespace{{`}}`}} ' @@ -51,12 +51,12 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(rate(grpc_server_started_total{job=~".*etcd.*", job="$cluster",grpc_type="unary"}[$__rate_interval])) + expr: sum(rate(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_type="unary"}[$__rate_interval])) legendFormat: RPC rate - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(rate(grpc_server_handled_total{job=~".*etcd.*", job="$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval])) + expr: sum(rate(grpc_server_handled_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval])) legendFormat: RPC failed rate title: RPC rate type: timeseries @@ -81,12 +81,12 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(grpc_server_started_total{job=~".*etcd.*",job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) + expr: sum(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{ {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) legendFormat: Watch streams - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(grpc_server_started_total{job=~".*etcd.*",job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) + expr: sum(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{ {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) legendFormat: Lease streams title: Active streams type: timeseries @@ -112,7 +112,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*", job="$cluster"} + expr: etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"} legendFormat: '{{`{{`}}instance{{`}}`}} DB size' title: DB size type: timeseries @@ -138,12 +138,12 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance, le)) + expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance, le)) legendFormat: '{{`{{`}}instance{{`}}`}} WAL fsync' - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance, le)) + expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance, le)) legendFormat: '{{`{{`}}instance{{`}}`}} DB fsync' title: Disk sync duration type: timeseries @@ -169,7 +169,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: process_resident_memory_bytes{job=~".*etcd.*", job="$cluster"} + expr: process_resident_memory_bytes{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"} legendFormat: '{{`{{`}}instance{{`}}`}} resident memory' title: Memory type: timeseries @@ -195,7 +195,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: rate(etcd_network_client_grpc_received_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval]) + expr: rate(etcd_network_client_grpc_received_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval]) legendFormat: '{{`{{`}}instance{{`}}`}} client traffic in' title: Client traffic in type: timeseries @@ -221,7 +221,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: rate(etcd_network_client_grpc_sent_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval]) + expr: rate(etcd_network_client_grpc_sent_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval]) legendFormat: '{{`{{`}}instance{{`}}`}} client traffic out' title: Client traffic out type: timeseries @@ -247,7 +247,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(rate(etcd_network_peer_received_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance) + expr: sum(rate(etcd_network_peer_received_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance) legendFormat: '{{`{{`}}instance{{`}}`}} peer traffic in' title: Peer traffic in type: timeseries @@ -273,7 +273,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: sum(rate(etcd_network_peer_sent_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance) + expr: sum(rate(etcd_network_peer_sent_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance) legendFormat: '{{`{{`}}instance{{`}}`}} peer traffic out' title: Peer traffic out type: timeseries @@ -298,7 +298,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", job="$cluster"}[1d]) + expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[1d]) legendFormat: '{{`{{`}}instance{{`}}`}} total leader elections per day' title: Raft proposals type: timeseries @@ -323,7 +323,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", job="$cluster"}[1d]) + expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[1d]) legendFormat: '{{`{{`}}instance{{`}}`}} total leader elections per day' title: Total leader elections per day type: timeseries @@ -349,7 +349,7 @@ panels: - datasource: type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }} uid: $datasource - expr: histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval]))) + expr: histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval]))) legendFormat: '{{`{{`}}instance{{`}}`}} peer round trip time' title: Peer round trip time type: timeseries diff --git a/charts/victoria-metrics-k8s-stack/hack/sync_dashboards.py b/charts/victoria-metrics-k8s-stack/hack/sync_dashboards.py index b2cf50bfc..0c7452915 100644 --- a/charts/victoria-metrics-k8s-stack/hack/sync_dashboards.py +++ b/charts/victoria-metrics-k8s-stack/hack/sync_dashboards.py @@ -108,8 +108,8 @@ def init_yaml_styles(): def fix_query(query): query = re.sub( - '[\\s]*cluster[\\s]*=[~]*[\\s]*\\"', - ' [[ $.Values.global.clusterLabel ]]=~"', + '[\\s]*[\\w-]+[\\s]*=[~]*[\\s]*\\"\\$cluster\\"', + ' [[ $.Values.global.clusterLabel ]]=~"$cluster"', query.rstrip(), ) if "\n" in query: