Skip to content

Commit

Permalink
feat: add alarm and log metric for queues not being active (#1123)
Browse files Browse the repository at this point in the history
* feat: add alarm and log metric for queues not being active

* chore: fix alarms
  • Loading branch information
andrewleith authored Jan 17, 2024
1 parent 5ff8d0c commit b3a506d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
31 changes: 31 additions & 0 deletions aws/eks/cloudwatch_alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -907,3 +907,34 @@ resource "aws_cloudwatch_metric_alarm" "karpenter-replicas-unavailable" {
}
}
}

resource "aws_cloudwatch_metric_alarm" "queues-not-active-1-minute-warning" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "queues-not-active-1-minute-warning"
alarm_description = "Queues have not been active for one minute"
comparison_operator = "LessThanThreshold"
evaluation_periods = "1"
metric_name = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].namespace
period = "60"
statistic = "Sum"
threshold = 1
treat_missing_data = "breaching"
alarm_actions = [var.sns_alert_warning_arn]
}

resource "aws_cloudwatch_metric_alarm" "queues-not-active-5-minutes-critical" {
count = var.cloudwatch_enabled ? 1 : 0
alarm_name = "queues-not-active-5-minutes-critical"
alarm_description = "Queues have not been active for 5 minutes"
comparison_operator = "LessThanThreshold"
evaluation_periods = "1"
metric_name = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].name
namespace = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].namespace
period = "300"
statistic = "Sum"
threshold = 1
treat_missing_data = "breaching"
alarm_actions = [var.sns_alert_critical_arn]
ok_actions = [var.sns_alert_critical_arn]
}
13 changes: 13 additions & 0 deletions aws/eks/cloudwatch_log.tf
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,16 @@ resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" {
value = "1"
}
}

resource "aws_cloudwatch_log_metric_filter" "queues-are-active" {
count = var.cloudwatch_enabled ? 1 : 0
name = "queues-are-active"
pattern = "Batch saving with"
log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name

metric_transformation {
name = "queues-are-active"
namespace = "LogMetrics"
value = "1"
}
}

0 comments on commit b3a506d

Please sign in to comment.