Skip to content

Commit

Permalink
fixed ETCD dashboard cluster label (#1493)
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewChubatiuk committed Sep 19, 2024
1 parent d42ea05 commit d20968c
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
1 change: 1 addition & 0 deletions charts/victoria-metrics-k8s-stack/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Next release

- Moved crds to a shared chart and import them as a dependency
- Fixed ETCD dashboard

## 0.25.16

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(etcd_server_has_leader{job=~".*etcd.*", job="$cluster"})
expr: sum(etcd_server_has_leader{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"})
legendFormat: '{{`{{`}}cluster{{`}}`}} - {{`{{`}}namespace{{`}}`}}
'
Expand Down Expand Up @@ -51,12 +51,12 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(grpc_server_started_total{job=~".*etcd.*", job="$cluster",grpc_type="unary"}[$__rate_interval]))
expr: sum(rate(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_type="unary"}[$__rate_interval]))
legendFormat: RPC rate
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(grpc_server_handled_total{job=~".*etcd.*", job="$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval]))
expr: sum(rate(grpc_server_handled_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval]))
legendFormat: RPC failed rate
title: RPC rate
type: timeseries
Expand All @@ -81,12 +81,12 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(grpc_server_started_total{job=~".*etcd.*",job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
expr: sum(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{ {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
legendFormat: Watch streams
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(grpc_server_started_total{job=~".*etcd.*",job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
expr: sum(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{ {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
legendFormat: Lease streams
title: Active streams
type: timeseries
Expand All @@ -112,7 +112,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*", job="$cluster"}
expr: etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}
legendFormat: '{{`{{`}}instance{{`}}`}} DB size'
title: DB size
type: timeseries
Expand All @@ -138,12 +138,12 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance, le))
expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}} WAL fsync'
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance, le))
expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}} DB fsync'
title: Disk sync duration
type: timeseries
Expand All @@ -169,7 +169,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: process_resident_memory_bytes{job=~".*etcd.*", job="$cluster"}
expr: process_resident_memory_bytes{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}
legendFormat: '{{`{{`}}instance{{`}}`}} resident memory'
title: Memory
type: timeseries
Expand All @@ -195,7 +195,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: rate(etcd_network_client_grpc_received_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])
expr: rate(etcd_network_client_grpc_received_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])
legendFormat: '{{`{{`}}instance{{`}}`}} client traffic in'
title: Client traffic in
type: timeseries
Expand All @@ -221,7 +221,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: rate(etcd_network_client_grpc_sent_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])
expr: rate(etcd_network_client_grpc_sent_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])
legendFormat: '{{`{{`}}instance{{`}}`}} client traffic out'
title: Client traffic out
type: timeseries
Expand All @@ -247,7 +247,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(etcd_network_peer_received_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance)
expr: sum(rate(etcd_network_peer_received_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}} peer traffic in'
title: Peer traffic in
type: timeseries
Expand All @@ -273,7 +273,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(etcd_network_peer_sent_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance)
expr: sum(rate(etcd_network_peer_sent_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}} peer traffic out'
title: Peer traffic out
type: timeseries
Expand All @@ -298,7 +298,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", job="$cluster"}[1d])
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[1d])
legendFormat: '{{`{{`}}instance{{`}}`}} total leader elections per day'
title: Raft proposals
type: timeseries
Expand All @@ -323,7 +323,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", job="$cluster"}[1d])
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[1d])
legendFormat: '{{`{{`}}instance{{`}}`}} total leader elections per day'
title: Total leader elections per day
type: timeseries
Expand All @@ -349,7 +349,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])))
expr: histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])))
legendFormat: '{{`{{`}}instance{{`}}`}} peer round trip time'
title: Peer round trip time
type: timeseries
Expand Down
4 changes: 2 additions & 2 deletions charts/victoria-metrics-k8s-stack/hack/sync_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def init_yaml_styles():

def fix_query(query):
query = re.sub(
'[\\s]*cluster[\\s]*=[~]*[\\s]*\\"',
' [[ $.Values.global.clusterLabel ]]=~"',
'[\\s]*[\\w-]+[\\s]*=[~]*[\\s]*\\"\\$cluster\\"',
' [[ $.Values.global.clusterLabel ]]=~"$cluster"',
query.rstrip(),
)
if "\n" in query:
Expand Down

0 comments on commit d20968c

Please sign in to comment.