Skip to content

Commit

Permalink
Oldest Replication Slot Lag RDS alarm
Browse files Browse the repository at this point in the history
  • Loading branch information
mgeist authored and dazza-codes committed Mar 2, 2023
1 parent d575e86 commit ff38fe1
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
34 changes: 27 additions & 7 deletions alarms.tf
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
locals {
thresholds = {
BurstBalanceThreshold = min(max(var.burst_balance_threshold, 0), 100)
CPUUtilizationThreshold = min(max(var.cpu_utilization_threshold, 0), 100)
CPUCreditBalanceThreshold = max(var.cpu_credit_balance_threshold, 0)
DiskQueueDepthThreshold = max(var.disk_queue_depth_threshold, 0)
FreeableMemoryThreshold = max(var.freeable_memory_threshold, 0)
FreeStorageSpaceThreshold = max(var.free_storage_space_threshold, 0)
SwapUsageThreshold = max(var.swap_usage_threshold, 0)
BurstBalanceThreshold = min(max(var.burst_balance_threshold, 0), 100)
CPUUtilizationThreshold = min(max(var.cpu_utilization_threshold, 0), 100)
CPUCreditBalanceThreshold = max(var.cpu_credit_balance_threshold, 0)
DiskQueueDepthThreshold = max(var.disk_queue_depth_threshold, 0)
FreeableMemoryThreshold = max(var.freeable_memory_threshold, 0)
FreeStorageSpaceThreshold = max(var.free_storage_space_threshold, 0)
OldestReplicationThreshold = max(var.oldest_replication_threshold, 0)
SwapUsageThreshold = max(var.swap_usage_threshold, 0)
}

alarm_names = toset([
Expand Down Expand Up @@ -139,6 +140,25 @@ resource "aws_cloudwatch_metric_alarm" "free_storage_space_too_low" {
}
}

resource "aws_cloudwatch_metric_alarm" "oldest_replication_too_high" {
alarm_name = "oldest_replication_too_high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "1"
metric_name = "OldestReplicationSlotLag"
namespace = "AWS/RDS"
period = "600"
statistic = "Average"
threshold = local.thresholds["OldestReplicationThreshold"]
alarm_description = "Average database replication lag over last 10 minutes too high, disk may fill"
alarm_actions = [aws_sns_topic.default.arn]
ok_actions = [aws_sns_topic.default.arn]

dimensions = {
DBInstanceIdentifier = var.db_instance_id
}
}


resource "aws_cloudwatch_metric_alarm" "swap_usage_too_high" {
alarm_name = module.label["swap_usage_too_high"].id
comparison_operator = "GreaterThanThreshold"
Expand Down
7 changes: 7 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ variable "free_storage_space_threshold" {
# 2 Gigabyte in Byte
}

variable "oldest_replication_threshold" {
description = "The maximum amount of replication lag space in Megabyte."
type = string
default = 1000
# 1 Gigabyte in Megabyte
}

variable "swap_usage_threshold" {
description = "The maximum amount of swap space used on the DB instance in Byte."
type = number
Expand Down

0 comments on commit ff38fe1

Please sign in to comment.