Skip to content

Commit

Permalink
feat: ECS Airflow
Browse files Browse the repository at this point in the history
This is a basic ECS Airflow set up, comparing it to the MWAA Airflow set up.

GOV.UK PaaS is shutting down, so we need to move off. MWAA Airflow doesn't give
us the ability to run DAGs without access to all other DAGs' credentials, so we
are investigating how straight forward it is to do that with our own Airflow on
ECS.

Co-authored-by: Tash Boyse <[email protected]>
Co-authored-by: Michal Charemza <[email protected]>
Co-authored-by: Mohizur Khan <[email protected]>
Co-authored-by: Josh Wong <[email protected]>
  • Loading branch information
4 people committed Jun 10, 2024
1 parent bf61b73 commit 8bd7e1f
Show file tree
Hide file tree
Showing 7 changed files with 593 additions and 5 deletions.
5 changes: 5 additions & 0 deletions infra/ecr.tf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ resource "aws_ecr_repository" "superset" {
name = "${var.prefix}-superset"
}

resource "aws_ecr_repository" "airflow" {
name = "${var.prefix}-airflow"
}

resource "aws_ecr_repository" "flower" {
name = "${var.prefix}-flower"
}
Expand Down Expand Up @@ -269,6 +273,7 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" {
"${aws_ecr_repository.mirrors_sync.arn}",
"${aws_ecr_repository.mirrors_sync_cran_binary.arn}",
"${aws_ecr_repository.superset.arn}",
"${aws_ecr_repository.airflow.arn}",
"${aws_ecr_repository.flower.arn}",
"${aws_ecr_repository.mlflow.arn}",
]
Expand Down
301 changes: 301 additions & 0 deletions infra/ecs_main_airflow.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
resource "aws_ecs_service" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
cluster = aws_ecs_cluster.main_cluster.id
task_definition = aws_ecs_task_definition.airflow_service[count.index].arn
desired_count = 1
launch_type = "FARGATE"
deployment_maximum_percent = 200
platform_version = "1.4.0"
health_check_grace_period_seconds = "10"

network_configuration {
subnets = ["${aws_subnet.private_with_egress.*.id[0]}"]
security_groups = ["${aws_security_group.airflow_service.id}"]
}

load_balancer {
target_group_arn = aws_lb_target_group.airflow_8080[count.index].arn
container_port = "8080"
container_name = "airflow"
}

depends_on = [
aws_lb_listener.airflow_443,
]
}

resource "aws_ecs_task_definition" "airflow_service" {
count = var.airflow_on ? 1 : 0
family = "${var.prefix}-airflow"
container_definitions = templatefile(
"${path.module}/ecs_main_airflow_container_definitions.json", {
container_image = "${aws_ecr_repository.airflow.repository_url}:master"
container_name = "airflow"
log_group = "${aws_cloudwatch_log_group.airflow[count.index].name}"
log_region = "${data.aws_region.aws_region.name}"
cpu = "${local.airflow_container_cpu}"
memory = "${local.airflow_container_memory}"

db_host = "${aws_rds_cluster.airflow[count.index].endpoint}"
db_name = "${aws_rds_cluster.airflow[count.index].database_name}"
db_password = "${random_string.aws_db_instance_airflow_password.result}"
db_port = "${aws_rds_cluster.airflow[count.index].port}"
db_user = "${aws_rds_cluster.airflow[count.index].master_username}"
secret_key = "${random_string.airflow_secret_key.result}"

datasets_db_host = "${aws_rds_cluster.datasets.endpoint}"
datasets_db_name = "${aws_rds_cluster.datasets.database_name}"
datasets_db_password = "${random_string.aws_rds_cluster_instance_datasets_password.result}"
datasets_db_port = "${aws_rds_cluster.datasets.port}"
datasets_db_user = "${var.datasets_rds_cluster_master_username}"

sentry_dsn = "${var.sentry_notebooks_dsn}"
sentry_environment = "${var.sentry_environment}"

authbroker_url = "${var.airflow_authbroker_url}"
authbroker_client_id = "${var.airflow_authbroker_client_id}"
authbroker_client_secret = "${var.airflow_authbroker_client_secret}"
}
)
execution_role_arn = aws_iam_role.airflow_task_execution[count.index].arn
task_role_arn = aws_iam_role.airflow_task[count.index].arn
network_mode = "awsvpc"
cpu = local.airflow_container_cpu
memory = local.airflow_container_memory
requires_compatibilities = ["FARGATE"]

lifecycle {
ignore_changes = [
"revision",
]
}
}

resource "aws_cloudwatch_log_group" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
retention_in_days = "3653"
}

resource "aws_cloudwatch_log_subscription_filter" "airflow" {
count = var.cloudwatch_subscription_filter && var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
log_group_name = aws_cloudwatch_log_group.airflow[count.index].name
filter_pattern = ""
destination_arn = var.cloudwatch_destination_arn
}

resource "aws_iam_role" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-task-execution"
path = "/"
assume_role_policy = data.aws_iam_policy_document.airflow_task_execution_ecs_tasks_assume_role[count.index].json
}

data "aws_iam_policy_document" "airflow_task_execution_ecs_tasks_assume_role" {
count = var.airflow_on ? 1 : 0
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}

resource "aws_iam_role_policy_attachment" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
role = aws_iam_role.airflow_task_execution[count.index].name
policy_arn = aws_iam_policy.airflow_task_execution[count.index].arn
}

resource "aws_iam_policy" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-task-execution"
path = "/"
policy = data.aws_iam_policy_document.airflow_task_execution[count.index].json
}

data "aws_iam_policy_document" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
statement {
actions = [
"logs:CreateLogStream",
"logs:PutLogEvents",
]

resources = [
"${aws_cloudwatch_log_group.airflow[count.index].arn}:*",
]
}

statement {
actions = [
"ecr:BatchGetImage",
"ecr:GetDownloadUrlForLayer",
]

resources = [
"${aws_ecr_repository.airflow.arn}",
]
}

statement {
actions = [
"ecr:GetAuthorizationToken",
]

resources = [
"*",
]
}
}

resource "aws_iam_role" "airflow_task" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-task"
path = "/"
assume_role_policy = data.aws_iam_policy_document.airflow_task_ecs_tasks_assume_role[count.index].json
}

data "aws_iam_policy_document" "airflow_task_ecs_tasks_assume_role" {
count = var.airflow_on ? 1 : 0
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}

resource "aws_lb" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
load_balancer_type = "application"
internal = false
security_groups = ["${aws_security_group.airflow_lb.id}"]
subnets = aws_subnet.public.*.id
enable_deletion_protection = true
}

resource "aws_lb_listener" "airflow_443" {
count = var.airflow_on ? 1 : 0
load_balancer_arn = aws_lb.airflow[count.index].arn
port = "443"
protocol = "HTTPS"

ssl_policy = "ELBSecurityPolicy-TLS-1-2-2017-01"
certificate_arn = aws_acm_certificate_validation.airflow[count.index].certificate_arn

default_action {
target_group_arn = aws_lb_target_group.airflow_8080[count.index].arn
type = "forward"
}
}

resource "aws_lb_target_group" "airflow_8080" {
count = var.airflow_on ? 1 : 0
name_prefix = "s8080-"
port = "8080"
vpc_id = aws_vpc.main.id
target_type = "ip"
protocol = "HTTP"

health_check {
protocol = "HTTP"
timeout = 15
interval = 20
healthy_threshold = 2
unhealthy_threshold = 5

path = "/health"
}

lifecycle {
create_before_destroy = true
}
}

resource "aws_rds_cluster" "airflow" {
count = var.airflow_on ? 1 : 0
cluster_identifier = "${var.prefix}-airflow"
engine = "aurora-postgresql"
availability_zones = var.aws_availability_zones
database_name = "${var.prefix_underscore}_airflow"
master_username = "${var.prefix_underscore}_airflow_master"
master_password = random_string.aws_db_instance_airflow_password.result
backup_retention_period = 31
preferred_backup_window = "03:29-03:59"
apply_immediately = true

vpc_security_group_ids = ["${aws_security_group.airflow_db.id}"]
db_subnet_group_name = aws_db_subnet_group.airflow[count.index].name

final_snapshot_identifier = "${var.prefix}-airflow"

copy_tags_to_snapshot = true
enable_global_write_forwarding = false
}

resource "aws_rds_cluster_instance" "airflow" {
count = var.airflow_on ? 1 : 0
identifier = "${var.prefix}-airflow"
cluster_identifier = aws_rds_cluster.airflow[count.index].id
engine = aws_rds_cluster.airflow[count.index].engine
engine_version = aws_rds_cluster.airflow[count.index].engine_version
instance_class = var.airflow_db_instance_class
promotion_tier = 1
}

resource "aws_db_subnet_group" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
subnet_ids = aws_subnet.private_with_egress.*.id

tags = {
Name = "${var.prefix}-airflow"
}

lifecycle {
create_before_destroy = true
}
}

resource "random_string" "aws_db_instance_airflow_password" {
length = 99
special = false
}

resource "aws_iam_role" "airflow_ecs" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-ecs"
path = "/"
assume_role_policy = data.aws_iam_policy_document.airflow_ecs_assume_role[count.index].json
}

resource "aws_iam_role_policy_attachment" "airflow_ecs" {
count = var.airflow_on ? 1 : 0
role = aws_iam_role.airflow_ecs[count.index].name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole"
}

data "aws_iam_policy_document" "airflow_ecs_assume_role" {
count = var.airflow_on ? 1 : 0
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["ecs.amazonaws.com"]
}
}
}

resource "random_string" "airflow_secret_key" {
length = 64
special = false
}
74 changes: 74 additions & 0 deletions infra/ecs_main_airflow_container_definitions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
[
{
"environment": [{
"name": "DB_HOST",
"value": "${db_host}"
},{
"name": "DB_NAME",
"value": "${db_name}"
},{
"name": "DB_PASSWORD",
"value": "${db_password}"
},{
"name": "DB_PORT",
"value": "${db_port}"
},{
"name": "DB_USER",
"value": "${db_user}"
},{
"name": "SECRET_KEY",
"value": "${secret_key}"
},{
"name": "SENTRY_DSN",
"value": "${sentry_dsn}"
},{
"name": "SENTRY_ENVIRONMENT",
"value": "${sentry_environment}"
},{
"name": "AUTHBROKER_URL",
"value": "${authbroker_url}"
},{
"name": "AUTHBROKER_CLIENT_ID",
"value": "${authbroker_client_id}"
},{
"name": "AUTHBROKER_CLIENT_SECRET",
"value": "${authbroker_client_secret}"
},{
"name": "DATASETS_DB_HOST",
"value": "${datasets_db_host}"
},{
"name": "DATASETS_DB_NAME",
"value": "${datasets_db_name}"
},{
"name": "DATASETS_DB_PASSWORD",
"value": "${datasets_db_password}"
},{
"name": "DATASETS_DB_PORT",
"value": "${datasets_db_port}"
},{
"name": "DATASETS_DB_USER",
"value": "${datasets_db_user}"
}
],
"essential": true,
"image": "${container_image}",
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "${log_group}",
"awslogs-region": "${log_region}",
"awslogs-stream-prefix": "${container_name}"
}
},
"networkMode": "awsvpc",
"memoryReservation": ${memory},
"cpu": ${cpu},
"mountPoints" : [],
"name": "${container_name}",
"portMappings": [{
"containerPort": 8080,
"hostPort": 8080,
"protocol": "tcp"
}]
}
]
Loading

0 comments on commit 8bd7e1f

Please sign in to comment.