-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature 1137 Create Terraform scripts for ECS backend (#1141)
* move backend infra into terraform * django on fargate, training on ec2
- Loading branch information
1 parent
453d6e9
commit 83d8827
Showing
9 changed files
with
624 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
# This workflow will build and push a new container image to Amazon ECR, | ||
# and then will deploy a new task definition to Amazon ECS, when there is a push to the "main" branch. | ||
# | ||
# To use this workflow, you will need to complete the following set-up steps: | ||
# | ||
# 1. Create an ECR repository to store your images. | ||
# For example: `aws ecr create-repository --repository-name my-ecr-repo --region us-east-2`. | ||
# Replace the value of the `ECR_REPOSITORY` environment variable in the workflow below with your repository's name. | ||
# Replace the value of the `AWS_REGION` environment variable in the workflow below with your repository's region. | ||
# | ||
# 2. Create an ECS task definition, an ECS cluster, and an ECS service. | ||
# For example, follow the Getting Started guide on the ECS console: | ||
# https://us-east-2.console.aws.amazon.com/ecs/home?region=us-east-2#/firstRun | ||
# Replace the value of the `ECS_SERVICE` environment variable in the workflow below with the name you set for the Amazon ECS service. | ||
# Replace the value of the `ECS_CLUSTER` environment variable in the workflow below with the name you set for the cluster. | ||
# | ||
# 3. Store your ECS task definition as a JSON file in your repository. | ||
# The format should follow the output of `aws ecs register-task-definition --generate-cli-skeleton`. | ||
# Replace the value of the `ECS_TASK_DEFINITION` environment variable in the workflow below with the path to the JSON file. | ||
# Replace the value of the `CONTAINER_NAME` environment variable in the workflow below with the name of the container | ||
# in the `containerDefinitions` section of the task definition. | ||
# | ||
# 4. Store an IAM user access key in GitHub Actions secrets named `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. | ||
# See the documentation for each action used below for the recommended IAM policies for this IAM user, | ||
# and best practices on handling the access key credentials. | ||
|
||
name: ECS Django Container Deployment | ||
|
||
# Only trigger when user clicks "run workflow" | ||
on: | ||
workflow_dispatch: | ||
|
||
env: | ||
AWS_REGION: "us-east-1" # set this to your preferred AWS region, e.g. us-west-1 | ||
ECR_REPOSITORY: "django" # set this to your Amazon ECR repository name | ||
ECS_SERVICE: "django" # set this to your Amazon ECS service name | ||
ECS_CLUSTER: "backend" # set this to your Amazon ECS cluster name | ||
CONTAINER_NAME: "django" # set this to the name of the container in the containerDefinitions section of your task definition | ||
|
||
permissions: | ||
contents: read | ||
actions: write | ||
|
||
jobs: | ||
deploy: | ||
name: Deploy | ||
runs-on: ubuntu-latest | ||
environment: production | ||
steps: | ||
- name: Get current branch | ||
run: echo running on branch ${GITHUB_REF##*/} | ||
|
||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v1 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_DEPLOY_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_DEPLOY_SECRET_ACCESS_KEY }} | ||
aws-region: ${{ env.AWS_REGION }} | ||
|
||
- name: Login to Amazon ECR | ||
id: login-ecr | ||
uses: aws-actions/amazon-ecr-login@v1 | ||
|
||
- name: Build, tag, and push image to Amazon ECR | ||
id: build-image | ||
env: | ||
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} | ||
IMAGE_TAG: ${{ github.sha }} | ||
run: | | ||
# Build a docker container and | ||
# push it to ECR so that it can | ||
# be deployed to ECS. | ||
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG training -f training/Dockerfile.prod | ||
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG | ||
echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT | ||
- name: Download task definition | ||
run: | | ||
aws ecs describe-task-definition --task-definition django --query taskDefinition > temp-task-definition.json | ||
- name: Fill in the new image ID in the Amazon ECS task definition | ||
id: task-def | ||
uses: aws-actions/amazon-ecs-render-task-definition@v1 | ||
with: | ||
task-definition: temp-task-definition.json | ||
container-name: ${{ env.CONTAINER_NAME }} | ||
image: ${{ steps.build-image.outputs.image }} | ||
|
||
- name: Deploy Amazon ECS task definition | ||
uses: aws-actions/amazon-ecs-deploy-task-definition@v1 | ||
with: | ||
task-definition: ${{ steps.task-def.outputs.task-definition }} | ||
service: ${{ env.ECS_SERVICE }} | ||
cluster: ${{ env.ECS_CLUSTER }} | ||
wait-for-service-stability: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# --- ALB --- | ||
resource "aws_security_group" "http" { | ||
name_prefix = "http-sg-" | ||
description = "Allow all HTTP/HTTPS traffic from public" | ||
vpc_id = aws_vpc.main.id | ||
|
||
dynamic "ingress" { | ||
for_each = [80, 443] | ||
content { | ||
protocol = "tcp" | ||
from_port = ingress.value | ||
to_port = ingress.value | ||
cidr_blocks = ["0.0.0.0/0"] | ||
} | ||
} | ||
|
||
egress { | ||
protocol = "-1" | ||
from_port = 0 | ||
to_port = 0 | ||
cidr_blocks = ["0.0.0.0/0"] | ||
} | ||
} | ||
|
||
resource "aws_lb" "main" { | ||
name = "alb" | ||
load_balancer_type = "application" | ||
subnets = aws_subnet.public[*].id | ||
security_groups = [aws_security_group.http.id] | ||
} | ||
|
||
resource "aws_lb_target_group" "app" { | ||
name_prefix = "app-" | ||
vpc_id = aws_vpc.main.id | ||
protocol = "HTTP" | ||
port = 8000 | ||
target_type = "ip" | ||
|
||
health_check { | ||
enabled = true | ||
path = "/health" | ||
matcher = 200 | ||
interval = 30 | ||
timeout = 5 | ||
healthy_threshold = 5 | ||
unhealthy_threshold = 2 | ||
} | ||
} | ||
|
||
resource "aws_lb_listener" "http" { | ||
load_balancer_arn = aws_lb.main.id | ||
port = 80 | ||
protocol = "HTTP" | ||
|
||
default_action { | ||
type = "forward" | ||
target_group_arn = aws_lb_target_group.app.id | ||
} | ||
} | ||
|
||
output "alb_url" { | ||
value = aws_lb.main.dns_name | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
resource "aws_ecr_repository" "training" { | ||
name = "training" | ||
image_tag_mutability = "MUTABLE" | ||
force_delete = true | ||
|
||
image_scanning_configuration { | ||
scan_on_push = true | ||
} | ||
} | ||
|
||
resource "aws_ecr_repository" "django" { | ||
name = "django" | ||
image_tag_mutability = "MUTABLE" | ||
force_delete = true | ||
|
||
image_scanning_configuration { | ||
scan_on_push = true | ||
} | ||
} | ||
|
||
output "training_repo_url" { | ||
value = aws_ecr_repository.training.repository_url | ||
} | ||
|
||
output "django_repo_url" { | ||
value = aws_ecr_repository.django.repository_url | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,89 +1,81 @@ | ||
terraform { | ||
required_providers { | ||
aws = { | ||
source = "hashicorp/aws" | ||
version = "~> 4.16" | ||
resource "aws_ecs_cluster" "main" { | ||
name = "backend" | ||
} | ||
|
||
# --- ECS Node Role --- | ||
data "aws_iam_policy_document" "ecs_node_doc" { | ||
statement { | ||
actions = ["sts:AssumeRole"] | ||
effect = "Allow" | ||
|
||
principals { | ||
type = "Service" | ||
identifiers = ["ec2.amazonaws.com"] | ||
} | ||
} | ||
|
||
required_version = ">= 1.2.0" | ||
} | ||
|
||
provider "aws" { | ||
region = "us-west-2" | ||
resource "aws_iam_role" "ecs_node_role" { | ||
name_prefix = "backend-ecs-node-role-" | ||
assume_role_policy = data.aws_iam_policy_document.ecs_node_doc.json | ||
} | ||
|
||
resource "aws_ecs_cluster" "deep-learning-playground-kernels" { | ||
name = "deep-learning-playground-kernels-test" | ||
setting { | ||
name = "containerInsights" | ||
value = "enabled" | ||
} | ||
resource "aws_iam_role_policy_attachment" "ecs_node_role_policy" { | ||
role = aws_iam_role.ecs_node_role.name | ||
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" | ||
} | ||
resource "aws_ecs_service" "dlp-training-service" { | ||
name = "dlp-training-service-test" | ||
cluster = aws_ecs_cluster.deep-learning-playground-kernels.id | ||
task_definition = "arn:aws:ecs:us-west-2:521654603461:task-definition/dlp-training-task:9" | ||
desired_count = 1 | ||
|
||
launch_type = "FARGATE" | ||
resource "aws_iam_instance_profile" "ecs_node" { | ||
name_prefix = "backend-ecs-node-profile-" | ||
path = "/ecs/instance/" | ||
role = aws_iam_role.ecs_node_role.name | ||
} | ||
|
||
deployment_maximum_percent = "200" | ||
deployment_minimum_healthy_percent = "100" | ||
scheduling_strategy = "REPLICA" | ||
# --- ECS Task Role --- | ||
data "aws_iam_policy_document" "ecs_task_doc" { | ||
statement { | ||
actions = ["sts:AssumeRole"] | ||
effect = "Allow" | ||
|
||
network_configuration { | ||
security_groups = ["sg-09291eb84a19daeed"] | ||
subnets = ["subnet-0bebe768ad78b896c", "subnet-0f3e41ad21cfe6ff5"] | ||
assign_public_ip = true | ||
principals { | ||
type = "Service" | ||
identifiers = ["ecs-tasks.amazonaws.com"] | ||
} | ||
} | ||
} | ||
resource "aws_appautoscaling_target" "dev_to_target" { | ||
max_capacity = 1 | ||
min_capacity = 1 | ||
resource_id = "service/${aws_ecs_cluster.deep-learning-playground-kernels.name}/${aws_ecs_service.dlp-training-service.name}" | ||
scalable_dimension = "ecs:service:DesiredCount" | ||
service_namespace = "ecs" | ||
|
||
resource "aws_iam_role" "ecs_task_role" { | ||
name_prefix = "backend-ecs-task-role" | ||
assume_role_policy = data.aws_iam_policy_document.ecs_task_doc.json | ||
} | ||
resource "aws_appautoscaling_policy" "training_service_auto_scaling_policy" { | ||
name = "TrainingServiceAutoScalingPolicy" | ||
policy_type = "StepScaling" | ||
resource_id = "service/${aws_ecs_cluster.deep-learning-playground-kernels.name}/${aws_ecs_service.dlp-training-service.name}" | ||
scalable_dimension = "ecs:service:DesiredCount" | ||
service_namespace = "ecs" | ||
|
||
step_scaling_policy_configuration { | ||
adjustment_type = "ChangeInCapacity" | ||
cooldown = 30 | ||
metric_aggregation_type = "Average" | ||
resource "aws_iam_role_policy_attachment" "ecs_task_role_policy" { | ||
for_each = toset([ | ||
"arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess", | ||
"arn:aws:iam::aws:policy/SecretsManagerReadWrite" | ||
]) | ||
|
||
step_adjustment { | ||
metric_interval_lower_bound = 0 | ||
scaling_adjustment = 3 | ||
} | ||
} | ||
|
||
depends_on = [ | ||
aws_appautoscaling_target.dev_to_target | ||
] | ||
role = aws_iam_role.ecs_task_role.name | ||
policy_arn = each.value | ||
} | ||
resource "aws_appautoscaling_policy" "dlp-queue-size-too-small-policy" { | ||
name = "DLPQueueSizeTooSmallPolicy" | ||
policy_type = "StepScaling" | ||
resource_id = "service/${aws_ecs_cluster.deep-learning-playground-kernels.name}/${aws_ecs_service.dlp-training-service.name}" | ||
scalable_dimension = "ecs:service:DesiredCount" | ||
service_namespace = "ecs" | ||
|
||
step_scaling_policy_configuration { | ||
adjustment_type = "ExactCapacity" | ||
cooldown = 30 | ||
metric_aggregation_type = "Average" | ||
|
||
step_adjustment { | ||
resource "aws_iam_role" "ecs_exec_role" { | ||
name_prefix = "backend-ecs-exec-role" | ||
assume_role_policy = data.aws_iam_policy_document.ecs_task_doc.json | ||
} | ||
|
||
metric_interval_upper_bound = 0 | ||
scaling_adjustment = 1 | ||
} | ||
} | ||
depends_on = [aws_appautoscaling_target.dev_to_target] | ||
resource "aws_iam_role_policy_attachment" "ecs_exec_role_policy" { | ||
role = aws_iam_role.ecs_exec_role.name | ||
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" | ||
} | ||
|
||
resource "aws_cloudwatch_log_group" "training" { | ||
name = "/ecs/training" | ||
retention_in_days = 14 | ||
} | ||
|
||
resource "aws_cloudwatch_log_group" "django" { | ||
name = "/ecs/django" | ||
retention_in_days = 14 | ||
} |
Oops, something went wrong.