Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed ETCD dashboard cluster label #1493

Merged
merged 1 commit into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/victoria-metrics-k8s-stack/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Next release

- TODO
- Fixed ETCD dashboard

## 0.25.16

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(etcd_server_has_leader{job=~".*etcd.*", job="$cluster"})
expr: sum(etcd_server_has_leader{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"})
legendFormat: '{{`{{`}}cluster{{`}}`}} - {{`{{`}}namespace{{`}}`}}

'
Expand Down Expand Up @@ -51,12 +51,12 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(grpc_server_started_total{job=~".*etcd.*", job="$cluster",grpc_type="unary"}[$__rate_interval]))
expr: sum(rate(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_type="unary"}[$__rate_interval]))
legendFormat: RPC rate
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(grpc_server_handled_total{job=~".*etcd.*", job="$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval]))
expr: sum(rate(grpc_server_handled_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval]))
legendFormat: RPC failed rate
title: RPC rate
type: timeseries
Expand All @@ -81,12 +81,12 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(grpc_server_started_total{job=~".*etcd.*",job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
expr: sum(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{ {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
legendFormat: Watch streams
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(grpc_server_started_total{job=~".*etcd.*",job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{job="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
expr: sum(grpc_server_started_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{ {{ $.Values.global.clusterLabel }}=~"$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
legendFormat: Lease streams
title: Active streams
type: timeseries
Expand All @@ -112,7 +112,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*", job="$cluster"}
expr: etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}
legendFormat: '{{`{{`}}instance{{`}}`}} DB size'
title: DB size
type: timeseries
Expand All @@ -138,12 +138,12 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance, le))
expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}} WAL fsync'
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance, le))
expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance, le))
legendFormat: '{{`{{`}}instance{{`}}`}} DB fsync'
title: Disk sync duration
type: timeseries
Expand All @@ -169,7 +169,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: process_resident_memory_bytes{job=~".*etcd.*", job="$cluster"}
expr: process_resident_memory_bytes{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}
legendFormat: '{{`{{`}}instance{{`}}`}} resident memory'
title: Memory
type: timeseries
Expand All @@ -195,7 +195,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: rate(etcd_network_client_grpc_received_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])
expr: rate(etcd_network_client_grpc_received_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])
legendFormat: '{{`{{`}}instance{{`}}`}} client traffic in'
title: Client traffic in
type: timeseries
Expand All @@ -221,7 +221,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: rate(etcd_network_client_grpc_sent_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])
expr: rate(etcd_network_client_grpc_sent_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])
legendFormat: '{{`{{`}}instance{{`}}`}} client traffic out'
title: Client traffic out
type: timeseries
Expand All @@ -247,7 +247,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(etcd_network_peer_received_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance)
expr: sum(rate(etcd_network_peer_received_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}} peer traffic in'
title: Peer traffic in
type: timeseries
Expand All @@ -273,7 +273,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: sum(rate(etcd_network_peer_sent_bytes_total{job=~".*etcd.*", job="$cluster"}[$__rate_interval])) by (instance)
expr: sum(rate(etcd_network_peer_sent_bytes_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])) by (instance)
legendFormat: '{{`{{`}}instance{{`}}`}} peer traffic out'
title: Peer traffic out
type: timeseries
Expand All @@ -298,7 +298,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", job="$cluster"}[1d])
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[1d])
legendFormat: '{{`{{`}}instance{{`}}`}} total leader elections per day'
title: Raft proposals
type: timeseries
Expand All @@ -323,7 +323,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", job="$cluster"}[1d])
expr: changes(etcd_server_leader_changes_seen_total{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[1d])
legendFormat: '{{`{{`}}instance{{`}}`}} total leader elections per day'
title: Total leader elections per day
type: timeseries
Expand All @@ -349,7 +349,7 @@ panels:
- datasource:
type: {{ default "prometheus" .Values.grafana.defaultDatasourceType }}
uid: $datasource
expr: histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*", job="$cluster"}[$__rate_interval])))
expr: histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*", {{ $.Values.global.clusterLabel }}=~"$cluster"}[$__rate_interval])))
legendFormat: '{{`{{`}}instance{{`}}`}} peer round trip time'
title: Peer round trip time
type: timeseries
Expand Down
4 changes: 2 additions & 2 deletions charts/victoria-metrics-k8s-stack/hack/sync_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def init_yaml_styles():

def fix_query(query):
query = re.sub(
'[\\s]*cluster[\\s]*=[~]*[\\s]*\\"',
' [[ $.Values.global.clusterLabel ]]=~"',
'[\\s]*[\\w-]+[\\s]*=[~]*[\\s]*\\"\\$cluster\\"',
' [[ $.Values.global.clusterLabel ]]=~"$cluster"',
query.rstrip(),
)
if "\n" in query:
Expand Down