diff --git a/monitoring/mongodb/alerts.test.yaml b/monitoring/mongodb/alerts.test.yaml index d59db119e..c6d3bf337 100644 --- a/monitoring/mongodb/alerts.test.yaml +++ b/monitoring/mongodb/alerts.test.yaml @@ -333,19 +333,32 @@ tests: - name: MongoDbRSNotSynced interval: 1m input_series: - - series: mongodb_rs_members_state{namespace="zenko", rs_nm="rs0", pod="data-db-mongodb-sharded-mongos-2", member_state="SECONDARY"} + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} values: 1x10 - + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 1x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-1", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="PRIMARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-0.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 1x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="SECONDARY", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-1.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 2x10 + - series: mongodb_rs_members_state{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-2", member_state="(not reachable/healthy)", rs_nm="data-db-mongodb-sharded-shard-0", member_idx="data-db-mongodb-sharded-shard0-data-2.data-db-mongodb-sharded-headless.zenko.svc.cluster.local:27017"} + values: 2 _ _ _ _ _ _ _ _ _ + alert_rule_test: - alertname: MongoDbRSNotSynced eval_time: 10m exp_alerts: - exp_labels: severity: warning - rs_nm: rs0 + rs_nm: data-db-mongodb-sharded-shard-0 exp_annotations: - description: "MongoDB replica set `rs0` is not in the expected state. It currently has `1` SECONDARY members instead of the expected number. Please ensure that all instance are running properly." + description: "MongoDB replica set `data-db-mongodb-sharded-shard-0` is not in the expected state. It currently has `1` SECONDARY members instead of the expected number. Please ensure that all instances are running properly." summary: MongoDB replica set out of sync - - - diff --git a/monitoring/mongodb/alerts.yaml b/monitoring/mongodb/alerts.yaml index 9ad78cb13..f5acde22f 100644 --- a/monitoring/mongodb/alerts.yaml +++ b/monitoring/mongodb/alerts.yaml @@ -183,10 +183,10 @@ groups: - alert: MongoDbRSNotSynced expr: | - count by (rs_nm, instance) (mongodb_rs_members_state{namespace="${namespace}", pod=~"${service}.*", member_state="SECONDARY"}) != (${replicas} - 1) + floor(avg by(rs_nm)(count by (rs_nm, pod)(mongodb_rs_members_state{namespace="${namespace}", pod=~"${service}.*", member_state="SECONDARY"}))) != (${replicas} - 1) for: 10m labels: severity: warning annotations: - description: "MongoDB replica set `{{ $labels.rs_nm }}` is not in the expected state. It currently has `{{ $value }}` SECONDARY members instead of the expected number. Please ensure that all instance are running properly." + description: "MongoDB replica set `{{ $labels.rs_nm }}` is not in the expected state. It currently has `{{ $value }}` SECONDARY members instead of the expected number. Please ensure that all instances are running properly." summary: MongoDB replica set out of sync