From c624c56b08c018a1e6f4777dfc5f0048eab3af9c Mon Sep 17 00:00:00 2001 From: Andrew Leith Date: Tue, 23 Jan 2024 11:52:48 -0400 Subject: [PATCH 1/3] chore: update alarm names/descriptions/metric filters to be more accurate --- aws/eks/cloudwatch_alarms.tf | 20 ++++++++++---------- aws/eks/cloudwatch_log.tf | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/aws/eks/cloudwatch_alarms.tf b/aws/eks/cloudwatch_alarms.tf index 4cc8cb901..e7e9d064d 100644 --- a/aws/eks/cloudwatch_alarms.tf +++ b/aws/eks/cloudwatch_alarms.tf @@ -908,14 +908,14 @@ resource "aws_cloudwatch_metric_alarm" "karpenter-replicas-unavailable" { } } -resource "aws_cloudwatch_metric_alarm" "queues-not-active-1-minute-warning" { +resource "aws_cloudwatch_metric_alarm" "sqs-beat-inbox-tasks-not-active-1-minute-warning" { count = var.cloudwatch_enabled ? 1 : 0 - alarm_name = "queues-not-active-1-minute-warning" - alarm_description = "Queues have not been active for one minute" + alarm_name = "sqs-beat-inbox-tasks-not-active-1-minute-warning" + alarm_description = "Beat inbox tasks have not been active for one minute" comparison_operator = "LessThanThreshold" evaluation_periods = "1" - metric_name = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].name - namespace = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].namespace + metric_name = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].namespace period = "60" statistic = "Sum" threshold = 1 @@ -923,14 +923,14 @@ resource "aws_cloudwatch_metric_alarm" "queues-not-active-1-minute-warning" { alarm_actions = [var.sns_alert_warning_arn] } -resource "aws_cloudwatch_metric_alarm" "queues-not-active-5-minutes-critical" { +resource "aws_cloudwatch_metric_alarm" "sqs-beat-inbox-tasks-not-active-5-minutes-critical" { count = var.cloudwatch_enabled ? 1 : 0 - alarm_name = "queues-not-active-5-minutes-critical" - alarm_description = "Queues have not been active for 5 minutes" + alarm_name = "sqs-beat-inbox-tasks-not-active-5-minutes-critical" + alarm_description = "Beat inbox tasks have not been active for 5 minutes" comparison_operator = "LessThanThreshold" evaluation_periods = "1" - metric_name = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].name - namespace = aws_cloudwatch_log_metric_filter.queues-are-active[0].metric_transformation[0].namespace + metric_name = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].namespace period = "300" statistic = "Sum" threshold = 1 diff --git a/aws/eks/cloudwatch_log.tf b/aws/eks/cloudwatch_log.tf index 746cb829b..ba189f437 100644 --- a/aws/eks/cloudwatch_log.tf +++ b/aws/eks/cloudwatch_log.tf @@ -154,14 +154,14 @@ resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" { } } -resource "aws_cloudwatch_log_metric_filter" "queues-are-active" { +resource "aws_cloudwatch_log_metric_filter" "beat-inbox-tasks-are-active" { count = var.cloudwatch_enabled ? 1 : 0 - name = "queues-are-active" + name = "beat-inbox-tasks-are-active" pattern = "Batch saving with" log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name metric_transformation { - name = "queues-are-active" + name = "beat-inbox-tasks-are-active" namespace = "LogMetrics" value = "1" } From 815cf9755d10b7b42ebd6b145c9b257b9ea91fd6 Mon Sep 17 00:00:00 2001 From: Andrew Leith Date: Tue, 23 Jan 2024 11:58:44 -0400 Subject: [PATCH 2/3] chore: get rid of the sqs prefix as it doesnt really make any sense --- aws/eks/cloudwatch_alarms.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aws/eks/cloudwatch_alarms.tf b/aws/eks/cloudwatch_alarms.tf index e7e9d064d..f703e8b54 100644 --- a/aws/eks/cloudwatch_alarms.tf +++ b/aws/eks/cloudwatch_alarms.tf @@ -908,9 +908,9 @@ resource "aws_cloudwatch_metric_alarm" "karpenter-replicas-unavailable" { } } -resource "aws_cloudwatch_metric_alarm" "sqs-beat-inbox-tasks-not-active-1-minute-warning" { +resource "aws_cloudwatch_metric_alarm" "beat-inbox-tasks-not-active-1-minute-warning" { count = var.cloudwatch_enabled ? 1 : 0 - alarm_name = "sqs-beat-inbox-tasks-not-active-1-minute-warning" + alarm_name = "beat-inbox-tasks-not-active-1-minute-warning" alarm_description = "Beat inbox tasks have not been active for one minute" comparison_operator = "LessThanThreshold" evaluation_periods = "1" @@ -923,9 +923,9 @@ resource "aws_cloudwatch_metric_alarm" "sqs-beat-inbox-tasks-not-active-1-minute alarm_actions = [var.sns_alert_warning_arn] } -resource "aws_cloudwatch_metric_alarm" "sqs-beat-inbox-tasks-not-active-5-minutes-critical" { +resource "aws_cloudwatch_metric_alarm" "beat-inbox-tasks-not-active-5-minutes-critical" { count = var.cloudwatch_enabled ? 1 : 0 - alarm_name = "sqs-beat-inbox-tasks-not-active-5-minutes-critical" + alarm_name = "beat-inbox-tasks-not-active-5-minutes-critical" alarm_description = "Beat inbox tasks have not been active for 5 minutes" comparison_operator = "LessThanThreshold" evaluation_periods = "1" From a1bbc78d30ef12a96a5e3e6a6e12c3db68e4ea44 Mon Sep 17 00:00:00 2001 From: Andrew Leith Date: Tue, 23 Jan 2024 13:24:28 -0400 Subject: [PATCH 3/3] chore: renamed things one more time! --- aws/eks/cloudwatch_alarms.tf | 16 ++++++++-------- aws/eks/cloudwatch_log.tf | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/aws/eks/cloudwatch_alarms.tf b/aws/eks/cloudwatch_alarms.tf index f703e8b54..2009c6c66 100644 --- a/aws/eks/cloudwatch_alarms.tf +++ b/aws/eks/cloudwatch_alarms.tf @@ -908,14 +908,14 @@ resource "aws_cloudwatch_metric_alarm" "karpenter-replicas-unavailable" { } } -resource "aws_cloudwatch_metric_alarm" "beat-inbox-tasks-not-active-1-minute-warning" { +resource "aws_cloudwatch_metric_alarm" "aggregating-queues-not-active-1-minute-warning" { count = var.cloudwatch_enabled ? 1 : 0 - alarm_name = "beat-inbox-tasks-not-active-1-minute-warning" + alarm_name = "aggregating-queues-not-active-1-minute-warning" alarm_description = "Beat inbox tasks have not been active for one minute" comparison_operator = "LessThanThreshold" evaluation_periods = "1" - metric_name = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].name - namespace = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].namespace + metric_name = aws_cloudwatch_log_metric_filter.aggregating-queues-are-active[0].metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.aggregating-queues-are-active[0].metric_transformation[0].namespace period = "60" statistic = "Sum" threshold = 1 @@ -923,14 +923,14 @@ resource "aws_cloudwatch_metric_alarm" "beat-inbox-tasks-not-active-1-minute-war alarm_actions = [var.sns_alert_warning_arn] } -resource "aws_cloudwatch_metric_alarm" "beat-inbox-tasks-not-active-5-minutes-critical" { +resource "aws_cloudwatch_metric_alarm" "aggregating-queues-not-active-5-minutes-critical" { count = var.cloudwatch_enabled ? 1 : 0 - alarm_name = "beat-inbox-tasks-not-active-5-minutes-critical" + alarm_name = "aggregating-queues-not-active-5-minutes-critical" alarm_description = "Beat inbox tasks have not been active for 5 minutes" comparison_operator = "LessThanThreshold" evaluation_periods = "1" - metric_name = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].name - namespace = aws_cloudwatch_log_metric_filter.beat-inbox-tasks-are-active[0].metric_transformation[0].namespace + metric_name = aws_cloudwatch_log_metric_filter.aggregating-queues-are-active[0].metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.aggregating-queues-are-active[0].metric_transformation[0].namespace period = "300" statistic = "Sum" threshold = 1 diff --git a/aws/eks/cloudwatch_log.tf b/aws/eks/cloudwatch_log.tf index ba189f437..1397e9767 100644 --- a/aws/eks/cloudwatch_log.tf +++ b/aws/eks/cloudwatch_log.tf @@ -154,14 +154,14 @@ resource "aws_cloudwatch_log_metric_filter" "documentation-evicted-pods" { } } -resource "aws_cloudwatch_log_metric_filter" "beat-inbox-tasks-are-active" { +resource "aws_cloudwatch_log_metric_filter" "aggregating-queues-are-active" { count = var.cloudwatch_enabled ? 1 : 0 - name = "beat-inbox-tasks-are-active" + name = "aggregating-queues-are-active" pattern = "Batch saving with" log_group_name = aws_cloudwatch_log_group.notification-canada-ca-eks-application-logs[0].name metric_transformation { - name = "beat-inbox-tasks-are-active" + name = "aggregating-queues-are-active" namespace = "LogMetrics" value = "1" }