Skip to content

Commit

Permalink
Merge pull request #86 from uktrade/ecs-airflow
Browse files Browse the repository at this point in the history
feat: ECS Airflow
  • Loading branch information
Mohizurkhan authored Jun 10, 2024
2 parents bf61b73 + 8bd7e1f commit eb4723e
Show file tree
Hide file tree
Showing 7 changed files with 593 additions and 5 deletions.
5 changes: 5 additions & 0 deletions infra/ecr.tf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ resource "aws_ecr_repository" "superset" {
name = "${var.prefix}-superset"
}

resource "aws_ecr_repository" "airflow" {
name = "${var.prefix}-airflow"
}

resource "aws_ecr_repository" "flower" {
name = "${var.prefix}-flower"
}
Expand Down Expand Up @@ -269,6 +273,7 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_ecr" {
"${aws_ecr_repository.mirrors_sync.arn}",
"${aws_ecr_repository.mirrors_sync_cran_binary.arn}",
"${aws_ecr_repository.superset.arn}",
"${aws_ecr_repository.airflow.arn}",
"${aws_ecr_repository.flower.arn}",
"${aws_ecr_repository.mlflow.arn}",
]
Expand Down
301 changes: 301 additions & 0 deletions infra/ecs_main_airflow.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
resource "aws_ecs_service" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
cluster = aws_ecs_cluster.main_cluster.id
task_definition = aws_ecs_task_definition.airflow_service[count.index].arn
desired_count = 1
launch_type = "FARGATE"
deployment_maximum_percent = 200
platform_version = "1.4.0"
health_check_grace_period_seconds = "10"

network_configuration {
subnets = ["${aws_subnet.private_with_egress.*.id[0]}"]
security_groups = ["${aws_security_group.airflow_service.id}"]
}

load_balancer {
target_group_arn = aws_lb_target_group.airflow_8080[count.index].arn
container_port = "8080"
container_name = "airflow"
}

depends_on = [
aws_lb_listener.airflow_443,
]
}

resource "aws_ecs_task_definition" "airflow_service" {
count = var.airflow_on ? 1 : 0
family = "${var.prefix}-airflow"
container_definitions = templatefile(
"${path.module}/ecs_main_airflow_container_definitions.json", {
container_image = "${aws_ecr_repository.airflow.repository_url}:master"
container_name = "airflow"
log_group = "${aws_cloudwatch_log_group.airflow[count.index].name}"
log_region = "${data.aws_region.aws_region.name}"
cpu = "${local.airflow_container_cpu}"
memory = "${local.airflow_container_memory}"

db_host = "${aws_rds_cluster.airflow[count.index].endpoint}"
db_name = "${aws_rds_cluster.airflow[count.index].database_name}"
db_password = "${random_string.aws_db_instance_airflow_password.result}"
db_port = "${aws_rds_cluster.airflow[count.index].port}"
db_user = "${aws_rds_cluster.airflow[count.index].master_username}"
secret_key = "${random_string.airflow_secret_key.result}"

datasets_db_host = "${aws_rds_cluster.datasets.endpoint}"
datasets_db_name = "${aws_rds_cluster.datasets.database_name}"
datasets_db_password = "${random_string.aws_rds_cluster_instance_datasets_password.result}"
datasets_db_port = "${aws_rds_cluster.datasets.port}"
datasets_db_user = "${var.datasets_rds_cluster_master_username}"

sentry_dsn = "${var.sentry_notebooks_dsn}"
sentry_environment = "${var.sentry_environment}"

authbroker_url = "${var.airflow_authbroker_url}"
authbroker_client_id = "${var.airflow_authbroker_client_id}"
authbroker_client_secret = "${var.airflow_authbroker_client_secret}"
}
)
execution_role_arn = aws_iam_role.airflow_task_execution[count.index].arn
task_role_arn = aws_iam_role.airflow_task[count.index].arn
network_mode = "awsvpc"
cpu = local.airflow_container_cpu
memory = local.airflow_container_memory
requires_compatibilities = ["FARGATE"]

lifecycle {
ignore_changes = [
"revision",
]
}
}

resource "aws_cloudwatch_log_group" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
retention_in_days = "3653"
}

resource "aws_cloudwatch_log_subscription_filter" "airflow" {
count = var.cloudwatch_subscription_filter && var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
log_group_name = aws_cloudwatch_log_group.airflow[count.index].name
filter_pattern = ""
destination_arn = var.cloudwatch_destination_arn
}

resource "aws_iam_role" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-task-execution"
path = "/"
assume_role_policy = data.aws_iam_policy_document.airflow_task_execution_ecs_tasks_assume_role[count.index].json
}

data "aws_iam_policy_document" "airflow_task_execution_ecs_tasks_assume_role" {
count = var.airflow_on ? 1 : 0
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}

resource "aws_iam_role_policy_attachment" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
role = aws_iam_role.airflow_task_execution[count.index].name
policy_arn = aws_iam_policy.airflow_task_execution[count.index].arn
}

resource "aws_iam_policy" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-task-execution"
path = "/"
policy = data.aws_iam_policy_document.airflow_task_execution[count.index].json
}

data "aws_iam_policy_document" "airflow_task_execution" {
count = var.airflow_on ? 1 : 0
statement {
actions = [
"logs:CreateLogStream",
"logs:PutLogEvents",
]

resources = [
"${aws_cloudwatch_log_group.airflow[count.index].arn}:*",
]
}

statement {
actions = [
"ecr:BatchGetImage",
"ecr:GetDownloadUrlForLayer",
]

resources = [
"${aws_ecr_repository.airflow.arn}",
]
}

statement {
actions = [
"ecr:GetAuthorizationToken",
]

resources = [
"*",
]
}
}

resource "aws_iam_role" "airflow_task" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-task"
path = "/"
assume_role_policy = data.aws_iam_policy_document.airflow_task_ecs_tasks_assume_role[count.index].json
}

data "aws_iam_policy_document" "airflow_task_ecs_tasks_assume_role" {
count = var.airflow_on ? 1 : 0
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}

resource "aws_lb" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
load_balancer_type = "application"
internal = false
security_groups = ["${aws_security_group.airflow_lb.id}"]
subnets = aws_subnet.public.*.id
enable_deletion_protection = true
}

resource "aws_lb_listener" "airflow_443" {
count = var.airflow_on ? 1 : 0
load_balancer_arn = aws_lb.airflow[count.index].arn
port = "443"
protocol = "HTTPS"

ssl_policy = "ELBSecurityPolicy-TLS-1-2-2017-01"
certificate_arn = aws_acm_certificate_validation.airflow[count.index].certificate_arn

default_action {
target_group_arn = aws_lb_target_group.airflow_8080[count.index].arn
type = "forward"
}
}

resource "aws_lb_target_group" "airflow_8080" {
count = var.airflow_on ? 1 : 0
name_prefix = "s8080-"
port = "8080"
vpc_id = aws_vpc.main.id
target_type = "ip"
protocol = "HTTP"

health_check {
protocol = "HTTP"
timeout = 15
interval = 20
healthy_threshold = 2
unhealthy_threshold = 5

path = "/health"
}

lifecycle {
create_before_destroy = true
}
}

resource "aws_rds_cluster" "airflow" {
count = var.airflow_on ? 1 : 0
cluster_identifier = "${var.prefix}-airflow"
engine = "aurora-postgresql"
availability_zones = var.aws_availability_zones
database_name = "${var.prefix_underscore}_airflow"
master_username = "${var.prefix_underscore}_airflow_master"
master_password = random_string.aws_db_instance_airflow_password.result
backup_retention_period = 31
preferred_backup_window = "03:29-03:59"
apply_immediately = true

vpc_security_group_ids = ["${aws_security_group.airflow_db.id}"]
db_subnet_group_name = aws_db_subnet_group.airflow[count.index].name

final_snapshot_identifier = "${var.prefix}-airflow"

copy_tags_to_snapshot = true
enable_global_write_forwarding = false
}

resource "aws_rds_cluster_instance" "airflow" {
count = var.airflow_on ? 1 : 0
identifier = "${var.prefix}-airflow"
cluster_identifier = aws_rds_cluster.airflow[count.index].id
engine = aws_rds_cluster.airflow[count.index].engine
engine_version = aws_rds_cluster.airflow[count.index].engine_version
instance_class = var.airflow_db_instance_class
promotion_tier = 1
}

resource "aws_db_subnet_group" "airflow" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow"
subnet_ids = aws_subnet.private_with_egress.*.id

tags = {
Name = "${var.prefix}-airflow"
}

lifecycle {
create_before_destroy = true
}
}

resource "random_string" "aws_db_instance_airflow_password" {
length = 99
special = false
}

resource "aws_iam_role" "airflow_ecs" {
count = var.airflow_on ? 1 : 0
name = "${var.prefix}-airflow-ecs"
path = "/"
assume_role_policy = data.aws_iam_policy_document.airflow_ecs_assume_role[count.index].json
}

resource "aws_iam_role_policy_attachment" "airflow_ecs" {
count = var.airflow_on ? 1 : 0
role = aws_iam_role.airflow_ecs[count.index].name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole"
}

data "aws_iam_policy_document" "airflow_ecs_assume_role" {
count = var.airflow_on ? 1 : 0
statement {
actions = ["sts:AssumeRole"]

principals {
type = "Service"
identifiers = ["ecs.amazonaws.com"]
}
}
}

resource "random_string" "airflow_secret_key" {
length = 64
special = false
}
74 changes: 74 additions & 0 deletions infra/ecs_main_airflow_container_definitions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
[
{
"environment": [{
"name": "DB_HOST",
"value": "${db_host}"
},{
"name": "DB_NAME",
"value": "${db_name}"
},{
"name": "DB_PASSWORD",
"value": "${db_password}"
},{
"name": "DB_PORT",
"value": "${db_port}"
},{
"name": "DB_USER",
"value": "${db_user}"
},{
"name": "SECRET_KEY",
"value": "${secret_key}"
},{
"name": "SENTRY_DSN",
"value": "${sentry_dsn}"
},{
"name": "SENTRY_ENVIRONMENT",
"value": "${sentry_environment}"
},{
"name": "AUTHBROKER_URL",
"value": "${authbroker_url}"
},{
"name": "AUTHBROKER_CLIENT_ID",
"value": "${authbroker_client_id}"
},{
"name": "AUTHBROKER_CLIENT_SECRET",
"value": "${authbroker_client_secret}"
},{
"name": "DATASETS_DB_HOST",
"value": "${datasets_db_host}"
},{
"name": "DATASETS_DB_NAME",
"value": "${datasets_db_name}"
},{
"name": "DATASETS_DB_PASSWORD",
"value": "${datasets_db_password}"
},{
"name": "DATASETS_DB_PORT",
"value": "${datasets_db_port}"
},{
"name": "DATASETS_DB_USER",
"value": "${datasets_db_user}"
}
],
"essential": true,
"image": "${container_image}",
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "${log_group}",
"awslogs-region": "${log_region}",
"awslogs-stream-prefix": "${container_name}"
}
},
"networkMode": "awsvpc",
"memoryReservation": ${memory},
"cpu": ${cpu},
"mountPoints" : [],
"name": "${container_name}",
"portMappings": [{
"containerPort": 8080,
"hostPort": 8080,
"protocol": "tcp"
}]
}
]
Loading

0 comments on commit eb4723e

Please sign in to comment.