Skip to content

Commit

Permalink
refactor: unique name for workbenches alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
jackdelahunt committed Nov 27, 2024
1 parent b2a21a2 commit afe96e7
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 21 deletions.
16 changes: 8 additions & 8 deletions config/monitoring/prometheus/apps/prometheus-configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1462,11 +1462,11 @@ data:
- name: SLOs-probe_success_workbench
rules:
- alert: RHODS Jupyter Probe Success Burn Rate
- alert: RHODS Jupyter Probe Success 5m and 1h Burn Rate high
annotations:
message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).'
triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md"
summary: RHODS Jupyter Probe Success Burn Rate
summary: RHODS Jupyter Probe Success 5m and 1h Burn Rate high
expr: |
sum(probe_success:burnrate5m{instance=~"notebook-spawner"}) by (instance) > (14.40 * (1-0.98000))
and
Expand All @@ -1475,11 +1475,11 @@ data:
labels:
severity: critical
instance: notebook-spawner
- alert: RHODS Jupyter Probe Success Burn Rate
- alert: RHODS Jupyter Probe Success 30m and 6h Burn Rate high
annotations:
message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).'
triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md"
summary: RHODS Jupyter Probe Success Burn Rate
summary: RHODS Jupyter Probe Success 30m and 6h Burn Rate high
expr: |
sum(probe_success:burnrate30m{instance=~"notebook-spawner"}) by (instance) > (6.00 * (1-0.98000))
and
Expand All @@ -1488,11 +1488,11 @@ data:
labels:
severity: critical
instance: notebook-spawner
- alert: RHODS Jupyter Probe Success Burn Rate
- alert: RHODS Jupyter Probe Success 2h and 1d Burn Rate high
annotations:
message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).'
triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md"
summary: RHODS Jupyter Probe Success Burn Rate
summary: RHODS Jupyter Probe Success 2h and 1d Burn Rate high
expr: |
sum(probe_success:burnrate2h{instance=~"notebook-spawner"}) by (instance) > (3.00 * (1-0.98000))
and
Expand All @@ -1501,11 +1501,11 @@ data:
labels:
severity: warning
instance: notebook-spawner
- alert: RHODS Jupyter Probe Success Burn Rate
- alert: RHODS Jupyter Probe Success 6h and 3d Burn Rate high
annotations:
message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).'
triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md"
summary: RHODS Jupyter Probe Success Burn Rate
summary: RHODS Jupyter Probe Success 6h and 3d Burn Rate high
expr: |
sum(probe_success:burnrate6h{instance=~"notebook-spawner"}) by (instance) > (1.00 * (1-0.98000))
and
Expand Down
35 changes: 22 additions & 13 deletions tests/prometheus_unit_tests/workbenches-alerting.unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,16 @@ tests:
values: "0x60"
alert_rule_test:
- eval_time: 1h
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 5m and 1h Burn Rate high
exp_alerts: []
- eval_time: 1h
alertname: RHODS Jupyter Probe Success 30m and 6h Burn Rate high
exp_alerts: []
- eval_time: 1h
alertname: RHODS Jupyter Probe Success 2h and 1d Burn Rate high
exp_alerts: []
- eval_time: 1h
alertname: RHODS Jupyter Probe Success 6h and 3d Burn Rate high
exp_alerts: []

- interval: 1m
Expand All @@ -84,14 +93,14 @@ tests:
values: "1+1x60"
alert_rule_test:
- eval_time: 2m
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 5m and 1h Burn Rate high
exp_alerts:
- exp_labels:
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 5m and 1h Burn Rate high
instance: "notebook-spawner"
severity: critical
exp_annotations:
summary: "RHODS Jupyter Probe Success Burn Rate"
summary: "RHODS Jupyter Probe Success 5m and 1h Burn Rate high"
message: "High error budget burn for notebook-spawner (current value: 3)."
triage: 'https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md'

Expand All @@ -103,14 +112,14 @@ tests:
values: "1+1x60"
alert_rule_test:
- eval_time: 15m
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 30m and 6h Burn Rate high
exp_alerts:
- exp_labels:
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 30m and 6h Burn Rate high
instance: "notebook-spawner"
severity: critical
exp_annotations:
summary: "RHODS Jupyter Probe Success Burn Rate"
summary: "RHODS Jupyter Probe Success 30m and 6h Burn Rate high"
message: "High error budget burn for notebook-spawner (current value: 16)."
triage: 'https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md'

Expand All @@ -122,14 +131,14 @@ tests:
values: "1+1x60"
alert_rule_test:
- eval_time: 1h
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 2h and 1d Burn Rate high
exp_alerts:
- exp_labels:
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 2h and 1d Burn Rate high
instance: "notebook-spawner"
severity: warning
exp_annotations:
summary: "RHODS Jupyter Probe Success Burn Rate"
summary: "RHODS Jupyter Probe Success 2h and 1d Burn Rate high"
message: "High error budget burn for notebook-spawner (current value: 61)."
triage: 'https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md'

Expand All @@ -141,14 +150,14 @@ tests:
values: "1+1x200"
alert_rule_test:
- eval_time: 3h
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 6h and 3d Burn Rate high
exp_alerts:
- exp_labels:
alertname: RHODS Jupyter Probe Success Burn Rate
alertname: RHODS Jupyter Probe Success 6h and 3d Burn Rate high
instance: "notebook-spawner"
severity: warning
exp_annotations:
summary: "RHODS Jupyter Probe Success Burn Rate"
summary: "RHODS Jupyter Probe Success 6h and 3d Burn Rate high"
message: "High error budget burn for notebook-spawner (current value: 181)."
triage: 'https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Jupyter/rhods-jupyter-probe-success-burn-rate.md'

Expand Down

0 comments on commit afe96e7

Please sign in to comment.