diff --git a/docs/quick-start.md b/docs/quick-start.md index d1630e9d2..7fb0a7fc5 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -60,7 +60,7 @@ Below are the stand-alone CloudFormation templates for each of the sub-stacks. T | Name | Description | Source | Launch Stack | | -- | -- | :--: | :--: | -{{ cfn_stack_row("AWS Step Functions Example", "SfnExample", "step-functions/sfn-example.template.yaml", "Create a Step Functions State Machine, Batch Job Definitions, and container images to run an example genomics workflow") }} +{{ cfn_stack_row("AWS Step Functions Example", "SfnExample", "step-functions/sfn-workflow.template.yaml", "Create a Step Functions State Machine, Batch Job Definitions, and container images to run an example genomics workflow") }} {{ cfn_stack_row("Cromwell Server", "CromwellServer", "cromwell/cromwell-server.template.yaml", "Create an EC2 instance and an IAM instance profile to run Cromwell") }} {{ cfn_stack_row("Nextflow Resources", "NextflowResources", "nextflow/nextflow-resources.template.yaml", "Create Nextflow specific resources needed to run on AWS: an S3 Bucket for nextflow config and workflows, AWS Batch Job Definition for a Nextflow head node, and an IAM role for the nextflow head node job") }} diff --git a/src/containers/buildspec-nextflow.yml b/src/containers/buildspec-nextflow.yml new file mode 100644 index 000000000..d9636ce80 --- /dev/null +++ b/src/containers/buildspec-nextflow.yml @@ -0,0 +1,25 @@ +# CodeBuild buildspec file for creating container image for nextflow +# assumes the following environment variables: +# - PROJECT_BRANCH: git branch / tag / commit-id to build +# - PROJECT_PATH: path in the source to navigate to prior to build +# - REGISTRY: docker image registry (e.g. ECR) to push the container image to +# - CONTAINER_NAME: name of the container +# - AWS_REGION: (Provided by CodeBuild) region to use for ECR +version: 0.2 +phases: + pre_build: + commands: + - git checkout $PROJECT_BRANCH + - cd $PROJECT_PATH + build: + commands: + - echo "Building container" + - docker build -t ${CONTAINER_NAME} . + post_build: + commands: + - echo "Tagging container image for ECR" + - docker tag ${CONTAINER_NAME} ${REGISTRY}/${CONTAINER_NAME} + - echo "Docker Login to ECR" + - $(aws ecr get-login --no-include-email --region ${AWS_REGION}) + - echo "Pushing container images to ECR" + - docker push ${REGISTRY}/${CONTAINER_NAME} \ No newline at end of file diff --git a/src/containers/buildspec-workflow-tool.yml b/src/containers/buildspec-workflow-tool.yml new file mode 100644 index 000000000..eb227940e --- /dev/null +++ b/src/containers/buildspec-workflow-tool.yml @@ -0,0 +1,27 @@ +# CodeBuild buildspec file for creating container images for a workflow tool +# assumes the following environment variables: +# - PROJECT_BRANCH: git branch / tag / commit-id to build +# - PROJECT_PATH: path in the source to navigate to prior to build +# - REGISTRY: docker image registry (e.g. ECR) to push the container image to +# - CONTAINER_NAME: name of the container +# - AWS_REGION: (Provided by CodeBuild) region to use for ECR +version: 0.2 +phases: + pre_build: + commands: + - git checkout $PROJECT_BRANCH + - cd $PROJECT_PATH + - cp -R ../_common . + build: + commands: + - echo "Building container" + - chmod +x _common/build.sh + - _common/build.sh ${CONTAINER_NAME} + post_build: + commands: + - echo "Tagging container image for ECR" + - docker tag ${CONTAINER_NAME} ${REGISTRY}/${CONTAINER_NAME} + - echo "Docker Login to ECR" + - $(aws ecr get-login --no-include-email --region ${AWS_REGION}) + - echo "Pushing container images to ECR" + - docker push ${REGISTRY}/${CONTAINER_NAME} \ No newline at end of file diff --git a/src/ebs-autoscale/.gitignore b/src/ebs-autoscale/.gitignore deleted file mode 100644 index 1f1b815f5..000000000 --- a/src/ebs-autoscale/.gitignore +++ /dev/null @@ -1,53 +0,0 @@ -*tar.gz - -#********** osx template********** - -.DS_Store - -# Thumbnails -._* - -# Files that might appear on external disk -.Spotlight-V100 -.Trashes - - -#********** windows template********** - -# Windows image file caches -Thumbs.db - -# Folder config file -Desktop.ini - -# Recycle Bin used on file shares -$RECYCLE.BIN/ - - -#********** linux template********** - -.* -!.gitignore -*~ - -# KDE -.directory - - -#********** emacs template********** - -*~ -\#*\# -/.emacs.desktop -/.emacs.desktop.lock -.elc -auto-save-list -tramp -.\#* - -# Org-mode -.org-id-locations -*_archive - -Makefile -scratch diff --git a/src/ebs-autoscale/README.md b/src/ebs-autoscale/README.md index 2bfea4a39..a84f05ded 100644 --- a/src/ebs-autoscale/README.md +++ b/src/ebs-autoscale/README.md @@ -1,58 +1,6 @@ # Amazon Elastic Block Store Autoscale -This is an example of a small daemon process that monitors a BTRFS filesystem mountpoint and automatically expands it when free space falls below a configured threshold. New [Amazon EBS](https://aws.amazon.com/ebs/) volumes are added to the instance as necessary and the underlying [BTRFS filesystem](http://btrfs.wiki.kernel.org) expands while still mounted. As new devices are added, the BTRFS metadata blocks are rebalanced to mitigate the risk that space for metadata will not run out. +## RELOCATION NOTICE -## Assumptions: - -1. That this code is running on a AWS EC2 instance -2. The instance has a IAM Instance Profile with appropriate permissions to create and attache new EBS volumes. Ssee the [IAM Instance Profile](#iam_instance_profile) section below for more details -3. That prerequisites are installed on the instance. - -Provided in this repo are: - -1. A python [script](bin/create-ebs-volume.py) that creates and attaches new EBS volumes to the current instance -2. The daemon [script](bin/ebs-autoscale) that monitors disk space and expands the BTRFS filesystem by leveraging the above script to add EBS volumes, expand the filesystem, and rebalance the metadata blocks -2. A template for an [upstart configuration file](templates/ebs-autoscale.conf.template) -2. A [logrotate configuration file](templates/ebs-autoscale.logrotate) which should not be needed but may as well be in place for long-running instances. -5. A [initialization script](bin/init-ebs-autoscale.sh) to configure and install all of the above -6. A [cloud-init](templates/cloud-init-userdata.yaml) file for user-data that installs required packages and runs the initialization script. By default this creates a mount point of `/scratch` on a encrypted 20GB EBS volume. To change the mount point, edit the file. - -## Installation - -The easiest way to set up an instance is to provide a launch call with the userdata [cloud-init script](templates/cloud-init-userdata.yaml). Here is an example of launching the [Amazon ECS-Optimized AMI](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-optimized_AMI.html) in us-east-1 using this file: - -```bash -aws ec2 run-instances --image-id ami-5253c32d \ - --key-name MyKeyPair \ - --user-data file://./templates/cloud-init-userdata.yaml \ - --count 1 \ - --security-group-ids sg-123abc123 \ - --instance-type t2.micro \ - --iam-instance-profile Name=MyInstanceProfileWithProperPermissions -``` - - -## A note on IAM Instance Profile - -In the above, we assume that the `MyInstanceProfileWithProperPermissions` EC2 Instance Profile exists and has the following permissions: - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AttachVolume", - "ec2:DescribeVolumeStatus", - "ec2:DescribeVolumes", - "ec2:ModifyInstanceAttribute", - "ec2:DescribeVolumeAttribute", - "ec2:CreateVolume", - "ec2:DeleteVolume" - ], - "Resource": "*" - } - ] -} -``` +The code for this daemon has been moved to the following repoository: +[awslabs/amazon-ebs-autoscale](https://github.com/awslabs/amazon-ebs-autoscale) diff --git a/src/ebs-autoscale/bin/create-ebs-volume.py b/src/ebs-autoscale/bin/create-ebs-volume.py deleted file mode 100755 index 7f217a467..000000000 --- a/src/ebs-autoscale/bin/create-ebs-volume.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python -# Copyright 2018 Amazon.com, Inc. or its affiliates. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, -# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -from __future__ import print_function -import glob, re, os, sys, time -import urllib -import argparse - -import boto3 -from botocore.exceptions import ClientError - -## TODO: CLI arguments -parameters = argparse.ArgumentParser(description="Create a new EBS Volume and attach it to the current instance") -parameters.add_argument("-s","--size", type=int, required=True) -parameters.add_argument("-t","--type", type=str, default="gp2") -parameters.add_argument("-e","--encrypted", type=bool, default=True) - -def device_exists(path): - try: - return os.path.stat.S_ISBLK(os.stat(path).st_mode) - except: - return False - -alphabet = [] -# Use letters b..z -for letter in range(98,123): - alphabet.append(chr(letter)) - -def detect_devices(): - devices = [] - for device in glob.glob('/dev/sd*'): - devices.append(device) - return devices - -def get_next_logical_device(): - devices = detect_devices() - for letter in alphabet: - d = "/dev/sd{0}".format(letter) - if d not in devices: - return d - return None - -def get_metadata(key): - return urllib.urlopen(("/").join(['http://169.254.169.254/latest/meta-data', key])).read() - - -# create a EBS volume -def create_and_attach_volume(size=10, vol_type="gp2", encrypted=True, max_attached_volumes=16, max_created_volumes=256): - instance_id = get_metadata("instance-id") - availability_zone = get_metadata("placement/availability-zone") - region = availability_zone[0:-1] - session = boto3.Session(region_name=region) - - ec2 = session.resource("ec2") - client = session.client("ec2") - instance = ec2.Instance(instance_id) - - # TODO: put a limit on the number of created volumes from this instance - # use tagging by instance-id for filtering - - # limit the number of volumes that can be attached to the instance - attached_volumes = [v.id for v in instance.volumes.all()] - if len(attached_volumes) > max_attached_volumes: - raise RuntimeError( - "maximum number of attached volumes reached ({})".format(max_attached_volumes) - ) - - device = get_next_logical_device() - if not device: - raise RuntimeError( - "could not find unused device" - ) - - # Attempt to create the volume - # A ClientError is thrown if there are insufficient permissions or if - # service limits are reached (e.g. hitting the limit for a storage class in a region) - # It's ok for this error to be uncaught. - volume = ec2.create_volume( - AvailabilityZone=availability_zone, - Encrypted=encrypted, - VolumeType=vol_type, - Size=size - ) - while True: - volume.reload() - if volume.state == "available": - break - else: - time.sleep(1) - - # Need to assure that the created volume is successfully attached to be - # cost efficient. If attachment fails, delete the volume. - try: - instance.attach_volume( - VolumeId=volume.volume_id, - Device=device - ) - except ClientError as e: - client.delete_volume(VolumeId=volume.volume_id) - raise e - - # wait until device exists - while True: - if device_exists(device): - break - else: - time.sleep(1) - - instance.modify_attribute( - Attribute="blockDeviceMapping", - BlockDeviceMappings=[{"DeviceName": device, - "Ebs": {"DeleteOnTermination":True,"VolumeId":volume.volume_id} - }] - ) - return device - -if __name__ == '__main__': - args = parameters.parse_args() - print(create_and_attach_volume(args.size), end='') - sys.stdout.flush() diff --git a/src/ebs-autoscale/bin/ebs-autoscale b/src/ebs-autoscale/bin/ebs-autoscale deleted file mode 100755 index d473e7f79..000000000 --- a/src/ebs-autoscale/bin/ebs-autoscale +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/sh -# Copyright 2018 Amazon.com, Inc. or its affiliates. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, -# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -if [ "$#" -ne "1" ]; then - echo "USAGE: $0 " - exit 1 -fi - -logthis () { - echo "[`date`] $1" -} - -MP=$1 -BASEDIR=$(dirname $0) -AZ=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/) - -logthis "EBS Autoscaling mountpoint: ${MP}" - -while [ -z "${AZ}" ]; do - logthis "Metadata service did not return AZ. Trying again." - sleep 1 - AZ=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/) -done -RG=$(echo ${AZ} | sed -e 's/[a-z]$//') -logthis "Region = $RG." -IN=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) -DRIVE_LETTERS=({a..z}) - -# make sure that this device is mounted. -until [ -d "${MP}" ]; do - sleep 1 -done -calc_threshold() { - local num_devices=$(ls /dev/sd* | grep -v -E '[0-9]$' | wc -l) - local threshold=50 - if [ "$num_devices" -gt "4" ] && [ "$num_devices" -le "6" ]; then - threshold=80 - elif [ "$num_devices" -gt "6" ] && [ "$num_devices" -le "10" ]; then - threshold=90 - else - threshold=50 - fi - echo ${threshold} -} -calc_new_size() { - local num_devices=$1 - local new_size=150 - - if [ "$num_devices" -ge "4" ] && [ "$num_devices" -le "6" ]; then - new_size=300 - elif [ "$num_devices" -gt "6" ] && [ "$num_devices" -le "10" ]; then - new_size=1000 - elif [ "$num_devices" -gt "10" ]; then - new_size=1500 - else - new_size=150 - fi - echo ${new_size} -} - -add_space () { - local num_devices=$(ls /dev/sd* | grep -v -E '[0-9]$' | wc -l) - if [ "${num_devices}" -ge "16" ]; then - logthis "No more volumes can be safely added." - return 0 - fi - local curr_size=$(df -BG ${MP} | grep ${MP} | awk '{print $2} ' | cut -d'G' -f1) - if [ "${curr_size}" -lt "16384" ]; then - local vol_size=$(calc_new_size ${num_devices}) - logthis "Extending LV ${MP} by ${vol_size}GB" - - DV=$(python ${BASEDIR}/create-ebs-volume.py -s ${vol_size}) - - exit_status=$? - if [ $exit_status -eq 0 ]; then - logthis "adding volume to filesystem" - btrfs device add ${DV} ${MP} - btrfs balance start -m ${MP} - logthis "Finished extending device." - - else - logthis "Error creating or attaching volume" - fi - - fi -} - -COUNT=300 -THRESHOLD=$(calc_threshold) -while true; do - F=$(df -BG ${MP} | grep -v Filesystem | awk '{print $5}' | cut -d"%" -f1 -) - if [ $F -ge "${THRESHOLD}" ]; then - logthis "LOW DISK ($F): Adding more." - add_space - fi - if [ "${COUNT}" -ge "300" ]; then - logthis "Threshold -> ${THRESHOLD} :: Used% -> ${F}%" - COUNT=0 - fi - THRESHOLD=$(calc_threshold) - COUNT=$(expr $COUNT + 1 ) - sleep 1 -done diff --git a/src/ebs-autoscale/bin/init-ebs-autoscale.sh b/src/ebs-autoscale/bin/init-ebs-autoscale.sh deleted file mode 100755 index d64c3ef1a..000000000 --- a/src/ebs-autoscale/bin/init-ebs-autoscale.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/sh -# Copyright 2018 Amazon.com, Inc. or its affiliates. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, -# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -function printUsage() { - #statements - echo "USAGE: $0 []" -} - -if [ "$#" -lt "1" ]; then - printUsage - exit 1 -fi - - -MP=$1 -DV=$2 - -AZ=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/) -RG=$(echo ${AZ} | sed -e 's/[a-z]$//') -IN=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) -BASEDIR=$(dirname $0) - -# copy the binaries to /usr/local/bin -cp ${BASEDIR}/{create-ebs-volume.py,ebs-autoscale} /usr/local/bin/ - -# If a device is not given, or if the device is not valid -# create a new 20GB volume -if [ -z "${DV}" ] || [ ! -b "${DV}" ]; then - DV=$(create-ebs-volume.py --size 20) -fi - -# create the BTRFS filesystem -mkfs.btrfs -f -d single $DV - -if [ -e $MP ] && ! [ -d $MP ]; then - echo "ERR: $MP exists but is not a directory." - exit 1 -elif ! [ -e $MP ]; then - mkdir -p $MP -fi -mount $DV $MP - -echo -e "${DV}\t${MP}\tbtrfs\tdefaults\t0\t0" | tee -a /etc/fstab - -# go to the template directory -cd ${BASEDIR}/../templates - -# install the upstart config -sed -e "s#YOUR_MOUNTPOINT#${MP}#" ebs-autoscale.conf.template > /etc/init/ebs-autoscale.conf - -# install the logrotate config -cp ebs-autoscale.logrotate /etc/logrotate.d/ebs-autoscale - -# Register the ebs-autoscale upstart conf and start the service -initctl reload-configuration -initctl start ebs-autoscale diff --git a/src/ebs-autoscale/templates/cloud-init-userdata.yaml b/src/ebs-autoscale/templates/cloud-init-userdata.yaml deleted file mode 100644 index 9164b5553..000000000 --- a/src/ebs-autoscale/templates/cloud-init-userdata.yaml +++ /dev/null @@ -1,16 +0,0 @@ -#cloud-config -repo_update: true -repo_upgrade: all - -packages: - - jq - - btrfs-progs - - python27-pip - - sed - - wget - - -runcmd: - - pip install -U awscli boto3 - - cd /opt && wget https://aws-genomics-workflows.s3.amazonaws.com/artifacts/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz - - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh /scratch 2>&1 > /var/log/init-ebs-autoscale.log diff --git a/src/ebs-autoscale/templates/ebs-autoscale.conf.template b/src/ebs-autoscale/templates/ebs-autoscale.conf.template deleted file mode 100644 index dcc0b05bb..000000000 --- a/src/ebs-autoscale/templates/ebs-autoscale.conf.template +++ /dev/null @@ -1,23 +0,0 @@ -# upstart configuration -description "A process to monitor disk usage by the docker and add EBS volumes to it as necessary" -author "angel pizarro" - -env MOUNTPOINT=YOUR_MOUNTPOINT - -start on filesystem or runlevel [2345] - -stop on shutdown - -script - echo $$ >> /var/run/ebs-autoscale.pid - exec /usr/local/bin/ebs-autoscale ${MOUNTPOINT} >> /var/log/ebs-autoscale.log 2>&1 -end script - -pre-start script - echo "[`date`] Starting Docker EBS autoscaling" >> /var/log/ebs-autoscale.log -end script - -pre-stop script - rm -f /var/run/ebs-autoscale.pid - echo "[`date`] Stopping Docker EBS autoscaling" >> /var/log/ebs-autoscale.log -end script diff --git a/src/ebs-autoscale/templates/ebs-autoscale.logrotate b/src/ebs-autoscale/templates/ebs-autoscale.logrotate deleted file mode 100644 index 0893adf56..000000000 --- a/src/ebs-autoscale/templates/ebs-autoscale.logrotate +++ /dev/null @@ -1,11 +0,0 @@ -/var/log/ebs-autoscale.log { - daily - missingok - rotate 2 - compress - notifempty - create 0644 root root - postrotate - service ebs-autoscale restart - endscript -} diff --git a/src/templates/_common/container-build.template.yaml b/src/templates/_common/container-build.template.yaml new file mode 100644 index 000000000..8490f05b6 --- /dev/null +++ b/src/templates/_common/container-build.template.yaml @@ -0,0 +1,457 @@ +--- +AWSTemplateFormatVersion: "2010-09-09" +Description: >- + (WWPS-GLS-WF-CONTAINER-BUILD) Creates resources for building a Docker container + image using CodeBuild, storing the image in ECR, and optionally creating a + corresponding Batch Job Definition. + It is recommended to name this stack "container-{ContainerName}". + +Mappings: + TagMap: + default: + architecture: "genomics-workflows" + tags: + - Key: "architecture" + Value: "genomics-workflows" + +Parameters: + ContainerName: + Description: Name of the container (does not include tag) + Type: String + + GitRepoType: + Description: > + Git repository hosting provider. + Type: String + Default: GITHUB + AllowedValues: + - CODECOMMIT + - GITHUB + - BITBUCKET + + GitCloneUrlHttp: + Description: > + The HTTP clone url for the GitHub repository that has container source code. + For example - http://github.com/user/repo.git + Type: String + + ProjectBranch: + Description: branch, tag, or commit to use + Type: String + Default: master + + ProjectPath: + Description: > + Relative path in the repository to enter for the build. + For example - ./path/to/container + Type: String + Default: "." + + ProjectBuildSpecFile: + Description: > + Relative path to the project buildspec.yml file or equivalent. + Leave blank to use the template defined buildspec script. + Default: "buildspec.yml" + Type: String + + CreateBatchJobDefinition: + Description: > + Create an AWS Batch Job Definition for the container + Type: String + Default: "No" + AllowedValues: + - "Yes" + - "No" + +Conditions: + CreateBatchJobDefinitionTrue: + Fn::Equals: + - !Ref CreateBatchJobDefinition + - "Yes" + +Resources: + IAMCodeBuildRole: + Type: AWS::IAM::Role + Properties: + Description: !Sub codebuild-service-role-${AWS::StackName}-${AWS::Region} + Path: /service-role/ + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: codebuild.amazonaws.com + Action: sts:AssumeRole + Policies: + - PolicyName: !Sub codebuild-basepolicy-${AWS::StackName}-${AWS::Region} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Resource: + - !Sub "arn:aws:codebuild:${AWS::Region}:${AWS::AccountId}:report-group/*" + Action: + - codebuild:CreateReportGroup + - codebuild:CreateReport + - codebuild:UpdateReport + - codebuild:BatchPutTestCases + + - Effect: Allow + Resource: + - !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/*" + - !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/*:*" + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + + - Effect: Allow + Resource: + # Constructing the ARN manually here since we are using a custom + # resource to create the repository + - !Sub "arn:aws:ecr:${AWS::Region}:${AWS::AccountId}:repository/${ContainerName}" + Action: + - ecr:BatchCheckLayerAvailability + - ecr:CompleteLayerUpload + - ecr:InitiateLayerUpload + - ecr:PutImage + - ecr:UploadLayerPart + + - Effect: Allow + Resource: "*" + Action: + - ecr:GetAuthorizationToken + + CodeBuildProject: + Type: AWS::CodeBuild::Project + Properties: + Description: !Sub >- + Builds the container image ${ContainerName} + Artifacts: + Type: NO_ARTIFACTS + Environment: + Type: LINUX_CONTAINER + Image: aws/codebuild/standard:1.0 + ComputeType: BUILD_GENERAL1_LARGE + PrivilegedMode: True + EnvironmentVariables: + - Name: AWS_ACCOUNT_ID + Value: !Ref AWS::AccountId + + - Name: REGISTRY + Value: !Sub "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com" + + - Name: CONTAINER_NAME + Value: !Ref ContainerName + + - Name: PROJECT_BRANCH + Value: !Ref ProjectBranch + + - Name: PROJECT_PATH + Value: !Ref ProjectPath + + ServiceRole: !GetAtt IAMCodeBuildRole.Arn + Source: + Type: !Ref GitRepoType + Location: !Ref GitCloneUrlHttp + BuildSpec: !Ref ProjectBuildSpecFile + + Tags: !FindInMap ["TagMap", "default", "tags"] + + IAMLambdaExecutionRole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: lambda.amazonaws.com + Action: "sts:AssumeRole" + Path: / + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole + - arn:aws:iam::aws:policy/service-role/AWSLambdaRole + Policies: + - PolicyName: !Sub codebuild-access-${AWS::Region} + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - "codebuild:StartBuild" + - "codebuild:BatchGetBuilds" + Resource: "*" + + - PolicyName: !Sub ecr-access-${AWS::Region} + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Resource: "*" + Action: + - "ecr:GetAuthorizationToken" + - Effect: Allow + Resource: !Sub "arn:aws:ecr:${AWS::Region}:${AWS::AccountId}:repository/*" + Action: + - "ecr:DescribeRepositories" + - "ecr:CreateRepository" + - Effect: Allow + Resource: !Sub "arn:aws:ecr:${AWS::Region}:${AWS::AccountId}:repository/${ContainerName}" + Action: + - "ecr:*LifecyclePolicy" + - "ecr:DeleteRepository" + - "ecr:BatchDeleteImage" + + CodeBuildInvocation: + Type: Custom::CodeBuildInvocation + Properties: + ServiceToken: !GetAtt CodeBuildInvocationFunction.Arn + BuildProject: !Ref CodeBuildProject + + # Need to explicitly define dependency on the ECR Custom Resource + DependsOn: ECRRepositoryHandler + + CodeBuildInvocationFunction: + Type: AWS::Lambda::Function + Properties: + Handler: index.handler + Role: !GetAtt IAMLambdaExecutionRole.Arn + Runtime: python3.7 + Timeout: 900 # if containers take more than 15m to build, consider using a WaitCondition + Code: + ZipFile: | + from time import sleep + + import boto3 + import cfnresponse + + def handler(event, context): + if event['RequestType'] in ("Create", "Update"): + codebuild = boto3.client('codebuild') + build = codebuild.start_build( + projectName=event["ResourceProperties"]["BuildProject"] + )['build'] + + id = build['id'] + status = build['buildStatus'] + while status == 'IN_PROGRESS': + sleep(10) + build = codebuild.batch_get_builds(ids=[id])['builds'][0] + status = build['buildStatus'] + + if status != "SUCCEEDED": + cfnresponse.send(event, context, cfnresponse.FAILED, None) + + cfnresponse.send(event, context, cfnresponse.SUCCESS, None) + + # ECR Repositories defined by CloudFormation currently do not support Deletion + # or UpdateReplace Policies. This will cause failures when this stack or a + # parent stack (if this template is nested) are deleted or updated. + # + # To work around this, we can use a custom resource that handles: + # * pre-existing repositories on create and update events + # * retaining repositories on delete events + + # # Preferred way to create an ECR Image Repository + # # Leaving this here for reference + # ECRRepository: + # Type: "AWS::ECR::Repository" + # Properties: + # RepositoryName: !Ref ContainerName + # LifecyclePolicy: + # LifecyclePolicyText: |- + # { + # "rules": [ + # { + # "rulePriority": 1, + # "description": "Keep only one untagged image, expire all others", + # "selection": { + # "tagStatus": "untagged", + # "countType": "imageCountMoreThan", + # "countNumber": 1 + # }, + # "action": { + # "type": "expire" + # } + # } + # ] + # } + + ECRRepositoryHandler: + Type: Custom::ECRRepositoryHandler + Properties: + ServiceToken: !GetAtt ECRRepositoryHandlerFunction.Arn + RepositoryName: !Ref ContainerName + DeletePolicy: Retain + UpdateReplacePolicy: Retain + LifecyclePolicy: + LifecyclePolicyText: |- + { + "rules": [ + { + "rulePriority": 1, + "description": "Keep only one untagged image, expire all others", + "selection": { + "tagStatus": "untagged", + "countType": "imageCountMoreThan", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + } + ] + } + + ECRRepositoryHandlerFunction: + Type: AWS::Lambda::Function + Properties: + Handler: index.handler + Role: !GetAtt IAMLambdaExecutionRole.Arn + Runtime: python3.7 + Timeout: 10 + Code: + ZipFile: | + from time import sleep + + import boto3 + import cfnresponse + + send, SUCCESS, FAILED = ( + cfnresponse.send, + cfnresponse.SUCCESS, + cfnresponse.FAILED + ) + + ecr = boto3.client('ecr') + + def wait(repo, until): + until = until.lower() + if until == "deleted": + while True: + try: + sleep(1) + ecr.describe_repositories(repositoryNames=[repo]) + except ecr.exceptions.RepositoryNotFoundException: + break + + if until == "exists": + exists = False + while not exists: + try: + sleep(1) + exists = ecr.describe_repositories(repositoryNames=[repo])["repositories"] + break + except ecr.exceptions.RepositoryNotFoundException: + exists = False + + def put_lifecycle_policy(repo, props): + if props.get("LifecyclePolicy"): + ecr.put_lifecycle_policy( + repositoryName=repo, + lifecyclePolicyText=props["LifecyclePolicy"]["LifecyclePolicyText"] + ) + + def create(repo, props, event, context): + # use existing repository if available, otherwise create + try: + ecr.create_repository(repositoryName=repo) + wait(repo, "exists") + put_lifecycle_policy(repo, props) + + except ecr.exceptions.RepositoryAlreadyExistsException: + print(f"Repository '{repo}' already exists - CREATE ECR repository ignored") + put_lifecycle_policy(repo, props) + + except Exception as e: + send(event, context, FAILED, None) + raise(e) + + def update(repo, props, event, context): + # use existing repository if available + update_policy = props.get("UpdateReplacePolicy") + try: + if update_policy and update_policy.lower() == "retain": + put_lifecycle_policy(repo, props) + + else: + # replace the repo + delete(repo, props, event, context) + create(repo, props, event, context) + + except Exception as e: + send(event, context, FAILED, None) + raise(e) + + def delete(repo, props, event, context): + # retain repository if specified + # otherwise force delete + delete_policy = props.get("DetetePolicy") + try: + if delete_policy and not delete_policy.lower() == "retain": + ecr.delete_repository(repositoryName=repo, force=True) + wait(repo, "deleted") + + except Exception as e: + send(event, context, FAILED, None) + raise(e) + + def handler(event, context): + props = event["ResourceProperties"] + repo = props.get("RepositoryName") + + if event["RequestType"] in ("Create", "Update", "Delete"): + action = globals()[event["RequestType"].lower()] + action(repo, props, event, context) + send(event, context, SUCCESS, None) + + else: + # unhandled request type + send(event, context, FAILED, None) + + + BatchJobDef: + Type: AWS::Batch::JobDefinition + Condition: CreateBatchJobDefinitionTrue + Properties: + JobDefinitionName: !Ref ContainerName + Type: container + ContainerProperties: + Image: !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${ContainerName} + Vcpus: 8 + Memory: 16000 + Volumes: + - Host: + SourcePath: /opt/miniconda + Name: awscli + MountPoints: + - ContainerPath: /opt/miniconda + SourceVolume: awscli + +Outputs: + CodeBuildProject: + Value: !GetAtt CodeBuildProject.Arn + Export: + Name: !Sub ${AWS::StackName}-CodeBuildProject + + CodeBuildServiceRole: + Value: !GetAtt IAMCodeBuildRole.Arn + Export: + Name: !Sub ${AWS::StackName}-CodeBuildServiceRole + + ContainerImage: + Value: !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/${ContainerName} + Export: + Name: !Sub ${AWS::StackName}-ECRImageRepository + + JobDefinition: + Value: + Fn::If: + - CreateBatchJobDefinitionTrue + - !Ref BatchJobDef + - "-" + Export: + Name: !Sub ${AWS::StackName}-BatchJobDefinition + +... diff --git a/src/templates/aws-genomics-launch-template.template.yaml b/src/templates/aws-genomics-launch-template.template.yaml index 4789ca0b1..1f233f722 100644 --- a/src/templates/aws-genomics-launch-template.template.yaml +++ b/src/templates/aws-genomics-launch-template.template.yaml @@ -27,8 +27,10 @@ Mappings: - service docker stop - cp -au /var/lib/docker /var/lib/docker.bk - rm -rf /var/lib/docker/* - - cd /opt && wget $artifactRootUrl/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz - - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh $scratchPath /dev/sdc 2>&1 > /var/log/init-ebs-autoscale.log + - EBS_AUTOSCALE_VERSION=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest" | jq -r .tag_name) + - cd /opt && git clone https://github.com/awslabs/amazon-ebs-autoscale.git + - cd /opt/amazon-ebs-autoscale && git checkout $EBS_AUTOSCALE_VERSION + - sh /opt/amazon-ebs-autoscale/install.sh $scratchPath /dev/sdc 2>&1 > /var/log/ebs-autoscale-install.log - sed -i 's+OPTIONS=.*+OPTIONS="--storage-driver btrfs"+g' /etc/sysconfig/docker-storage - cp -au /var/lib/docker.bk/* /var/lib/docker - cd /opt && wget $artifactRootUrl/aws-ecs-additions.tgz && tar -xzf aws-ecs-additions.tgz @@ -38,8 +40,10 @@ Mappings: cromwell: additions: |- - - cd /opt && wget $artifactRootUrl/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz - - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh $scratchPath /dev/sdc 2>&1 > /var/log/init-ebs-autoscale.log + - EBS_AUTOSCALE_VERSION=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest" | jq -r .tag_name) + - cd /opt && git clone https://github.com/awslabs/amazon-ebs-autoscale.git + - cd /opt/amazon-ebs-autoscale && git checkout $EBS_AUTOSCALE_VERSION + - sh /opt/amazon-ebs-autoscale/install.sh $scratchPath /dev/sdc 2>&1 > /var/log/ebs-autoscale-install.log - cd /opt && wget $artifactRootUrl/aws-ecs-additions.tgz && tar -xzf aws-ecs-additions.tgz - sh /opt/ecs-additions/ecs-additions-cromwell.sh @@ -49,8 +53,10 @@ Mappings: - service docker stop - cp -au /var/lib/docker /var/lib/docker.bk - rm -rf /var/lib/docker/* - - cd /opt && wget $artifactRootUrl/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz - - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh $scratchPath /dev/sdc 2>&1 > /var/log/init-ebs-autoscale.log + - EBS_AUTOSCALE_VERSION=$(curl --silent "https://api.github.com/repos/awslabs/amazon-ebs-autoscale/releases/latest" | jq -r .tag_name) + - cd /opt && git clone https://github.com/awslabs/amazon-ebs-autoscale.git + - cd /opt/amazon-ebs-autoscale && git checkout $EBS_AUTOSCALE_VERSION + - sh /opt/amazon-ebs-autoscale/install.sh $scratchPath /dev/sdc 2>&1 > /var/log/ebs-autoscale-install.log - sed -i 's+OPTIONS=.*+OPTIONS="--storage-driver btrfs"+g' /etc/sysconfig/docker-storage - cp -au /var/lib/docker.bk/* /var/lib/docker - cd /opt && wget $artifactRootUrl/aws-ecs-additions.tgz && tar -xzf aws-ecs-additions.tgz @@ -145,6 +151,7 @@ Resources: - python27-pip - sed - wget + - git - amazon-ssm-agent runcmd: diff --git a/src/templates/cromwell/cromwell-server.template.yaml b/src/templates/cromwell/cromwell-server.template.yaml index 8081d84df..3421c75bb 100644 --- a/src/templates/cromwell/cromwell-server.template.yaml +++ b/src/templates/cromwell/cromwell-server.template.yaml @@ -90,6 +90,11 @@ Parameters: S3BucketName: Description: S3 bucket you are using for workflow inputs and outputs Type: String + + S3OpenDataBucketARNs: + Description: Open datasets on AWS S3 for workflow inputs + Type: String + Default: "arn:aws:s3:::gatk-test-data/*,arn:aws:s3:::broad-references/*" BatchQueue: Description: ARN of the AWS Batch Job Queue this server will use by default @@ -142,7 +147,10 @@ Resources: Action: - "s3:ListBucket" - "s3:ListAllMyBuckets" - + - Effect: Allow + Resource: !Split [",", !Ref S3OpenDataBucketARNs] + Action: + - "s3:GetObject" - PolicyName: !Sub CromwellServer-CloudWatch-Access-${AWS::Region} PolicyDocument: @@ -523,6 +531,7 @@ Metadata: - CromwellVersion - CromwellVersionSpecified - S3BucketName + - S3OpenDataBucketARNs - BatchQueue ParameterLabels: @@ -542,6 +551,8 @@ Metadata: default: "Key Pair Name" S3BucketName: default: "S3 Bucket Name" + S3OpenDataBucketARNs: + default: "S3 Open Data Bucket ARNs" BatchQueue: default: "Default Batch Queue" CromwellVersion: @@ -561,4 +572,3 @@ Outputs: HostName: Value: !GetAtt EC2Instance.PublicDnsName Description: Cromwell server public DNS name - diff --git a/src/templates/deprecated/aws-genomics-ami.template.yaml b/src/templates/deprecated/aws-genomics-ami.template.yaml deleted file mode 100644 index bcf6c2b8e..000000000 --- a/src/templates/deprecated/aws-genomics-ami.template.yaml +++ /dev/null @@ -1,304 +0,0 @@ -AWSTemplateFormatVersion: "2010-09-09" -Description: Creates a custom AMI to be used with AWS Batch - -Mappings: - TagMap: - default: - architecture: "genomics-workflows" - solution: "default" - tags: - - Key: "architecture" - Value: "genomics-workflows" - - Key: "solution" - Value: "default" - -# Parameters -Parameters: - VpcId: - Type: AWS::EC2::VPC::Id - Description: Recommended to use the Default VPC here - PublicSubnetID: - Type: AWS::EC2::Subnet::Id - Description: Select a public subnet to launch into - ScratchMountPoint: - Type: String - Default: /scratch - Description: Path for the scratch mount point in the instance - ScratchVolumeSize: - Type: Number - Default: 20 - Description: The initial size of the scratch volume - UseEncryption: - Type: String - Default: Yes - Description: Whether or not to encrypt the addtional volumes on the EC2 instance - AllowedValues: - - Yes - - No - LatestECSAMI: - Description: The latest Linux ECS Optimized AMI - Type: AWS::SSM::Parameter::Value - Default: /aws/service/ecs/optimized-ami/amazon-linux/recommended/image_id - AllowedValues: - - /aws/service/ecs/optimized-ami/amazon-linux/recommended/image_id - KeyName: - Type: AWS::EC2::KeyPair::KeyName - Default: GENERAL.NONE - AMIType: - Type: String - Description: The type of AMI you want to create - Default: default - AllowedValues: - - default - - cromwell - AMIName: - Type: String - Default: genomics-ami - Description: The name of the AMI that is created - -# Conditions -Conditions: - UseEncryptedVolume: !Equals [!Ref UseEncryption, Yes] - UseCromwell: !Equals [!Ref AMIType, cromwell] - -# Resources -Resources: - EC2SecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - GroupDescription: Security group for Custom AMI instance - VpcId: !Ref VpcId - Tags: - - Key: Name - Value: genomics-base-sg - EC2Instance: - Type: AWS::EC2::Instance - Metadata: - AWS::CloudFormation::Init: - config: - packages: - rpm: - ssm-agent: "https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm" - yum: - jq: [] - btrfs-progs: [] - python27-pip: [] - sed: [] - wget: [] - commands: - 00InstallBoto3: - command: pip install -U awscli boto3 - 01InstallAutoScaling: - command: wget https://aws-genomics-workflows.s3.amazonaws.com/artifacts/aws-ebs-autoscale.tgz && tar -xzf aws-ebs-autoscale.tgz - cwd: /opt - 02MountAutoScaling: - command: !Sub | - sh /opt/ebs-autoscale/bin/init-ebs-autoscale.sh '${ScratchMountPoint}' /dev/sdc 2>&1 > /var/log/init-ebs-autoscale.log - env: - PATH: "/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin" - 03GetECSAdditions: - command: wget https://aws-genomics-workflows.s3.amazonaws.com/artifacts/aws-ecs-additions.tgz && tar -xzf aws-ecs-additions.tgz - cwd: /opt - 04InstallECSAdditions: - command: - Fn::If: - - UseCromwell - - !Join [" ", ["sh", "/opt/ecs-additions/ecs-additions-cromwell.sh"]] - - echo "OK" - env: - PATH: "/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin" - Properties: - ImageId: !Ref LatestECSAMI - InstanceType: t2.large - KeyName: !Ref KeyName - Tags: - - Key: Name - Value: genomics-base-ami - - Key: architecture - Value: !FindInMap ["TagMap", "default", "architecture"] - - Key: solution - Value: !Ref AMIType - BlockDeviceMappings: - - DeviceName: /dev/sdc - Ebs: - VolumeSize: !Ref ScratchVolumeSize - Encrypted: - Fn::If: [UseEncryptedVolume, true, false] - - DeviceName: /dev/xvdcz - Ebs: - Encrypted: - Fn::If: [UseEncryptedVolume, true, false] - UserData: - Fn::Base64: !Sub | - #!/bin/bash -x - yum -y update - yum install -y aws-cfn-bootstrap - /opt/aws/bin/cfn-init --verbose --stack ${AWS::StackName} --resource EC2Instance --region ${AWS::Region} - /opt/aws/bin/cfn-signal -e $? --stack ${AWS::StackName} --resource EC2Instance --region ${AWS::Region} - shutdown -h now - NetworkInterfaces: - - AssociatePublicIpAddress: true - DeviceIndex: "0" - GroupSet: - - !Ref EC2SecurityGroup - SubnetId: !Ref PublicSubnetID - CreationPolicy: - ResourceSignal: - Timeout: PT15M - AMI: - Type: Custom::AMI - Properties: - ServiceToken: !GetAtt AMIFunction.Arn - InstanceId: !Ref EC2Instance - Tags: - - Key: architecture - Value: !FindInMap ["TagMap", "default", "architecture"] - - Key: solution - Value: !Ref AMIType - AMIFunction: - Type: AWS::Lambda::Function - Properties: - Handler: index.handler - Role: !GetAtt LambdaExecutionRole.Arn - Code: - ZipFile: !Sub | - var response = require('cfn-response'); - var AWS = require('aws-sdk'); - exports.handler = function(event, context) { - console.log("Request received:\n", JSON.stringify(event)); - var physicalId = event.PhysicalResourceId; - function success(data) { - return response.send(event, context, response.SUCCESS, data, physicalId); - } - function failed(e) { - return response.send(event, context, response.FAILED, e, physicalId); - } - // Call ec2.waitFor, continuing if not finished before Lambda function timeout. - function wait(waiter) { - console.log("Waiting: ", JSON.stringify(waiter)); - event.waiter = waiter; - event.PhysicalResourceId = physicalId; - var request = ec2.waitFor(waiter.state, waiter.params); - setTimeout(()=>{ - request.abort(); - console.log("Timeout reached, continuing function. Params:\n", JSON.stringify(event)); - var lambda = new AWS.Lambda(); - lambda.invoke({ - FunctionName: context.invokedFunctionArn, - InvocationType: 'Event', - Payload: JSON.stringify(event) - }).promise().then((data)=>context.done()).catch((err)=>context.fail(err)); - }, context.getRemainingTimeInMillis() - 5000); - return request.promise().catch((err)=> - (err.code == 'RequestAbortedError') ? - new Promise(()=>context.done()) : - Promise.reject(err) - ); - } - var ec2 = new AWS.EC2(), - instanceId = event.ResourceProperties.InstanceId; - if (event.waiter) { - wait(event.waiter).then((data)=>success({})).catch((err)=>failed(err)); - } else if (event.RequestType == 'Create' || event.RequestType == 'Update') { - if (!instanceId) { failed('InstanceID required'); } - ec2.waitFor('instanceStopped', {InstanceIds: [instanceId]}).promise() - .then((data)=> - ec2.createImage({ - InstanceId: instanceId, - Name: '${AMIName}' - }).promise() - ).then((data)=> - wait({ - state: 'imageAvailable', - params: {ImageIds: [physicalId = data.ImageId]} - }) - ).then((data)=>success({})).catch((err)=>failed(err)); - } else if (event.RequestType == 'Delete') { - if (physicalId.indexOf('ami-') !== 0) { return success({});} - ec2.describeImages({ImageIds: [physicalId]}).promise() - .then((data)=> - (data.Images.length == 0) ? success({}) : - ec2.deregisterImage({ImageId: physicalId}).promise() - ).then((data)=> - ec2.describeSnapshots({Filters: [{ - Name: 'description', - Values: ["*" + physicalId + "*"] - }]}).promise() - ).then((data)=> - (data.Snapshots.length === 0) ? success({}) : - ec2.deleteSnapshot({SnapshotId: data.Snapshots[0].SnapshotId}).promise() - ).then((data)=>success({})).catch((err)=>failed(err)); - } - }; - Runtime: nodejs8.10 - Timeout: 300 - LambdaExecutionRole: - Type: AWS::IAM::Role - Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Principal: { Service: [lambda.amazonaws.com] } - Action: ["sts:AssumeRole"] - Path: / - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole - - arn:aws:iam::aws:policy/service-role/AWSLambdaRole - Policies: - - PolicyName: EC2Policy - PolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Action: - - "ec2:DescribeInstances" - - "ec2:DescribeImages" - - "ec2:CreateImage" - - "ec2:DeregisterImage" - - "ec2:DescribeSnapshots" - - "ec2:DeleteSnapshot" - Resource: ["*"] - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: "Network Configuration" - Parameters: - - VpcId - - PublicSubnetID - - Label: - default: "Instance Configuration" - Parameters: - - AMIType - - ScratchMountPoint - - ScratchVolumeSize - - UseEncryption - - LatestECSAMI - - Label: - default: "Outputs" - Parameters: - - AMIName - ParameterLabels: - VpcId: - default: "VPC ID" - PublicSubnetID: - default: "Public Subnet ID" - ScratchMountPoint: - default: "Scratch Mount Point" - ScratchVolumeSize: - default: "Scratch Volume (GB)" - UseEncryption: - default: "Encrypt Volume" - AMIName: - default: "AMI Name" - LatestECSAMI: - default: "Latest ECS AMI" - AMIType: - default: "AMI Type" - -Outputs: - AMI: - Description: The AMI (Amazon Machine Image) ID of your instance - Value: !Ref AMI diff --git a/src/templates/nextflow/nextflow-aio.template.yaml b/src/templates/nextflow/nextflow-aio.template.yaml index d53fb77e4..23a133383 100644 --- a/src/templates/nextflow/nextflow-aio.template.yaml +++ b/src/templates/nextflow/nextflow-aio.template.yaml @@ -208,6 +208,8 @@ Resources: - !Ref ExistingNextflowBucket NextflowContainerImage: !Ref NextflowContainerImage BatchDefaultJobQueue: !GetAtt GenomicsWorkflowStack.Outputs.GenomicsEnvDefaultJobQueueArn + BatchHighPriorityJobQueue: !GetAtt GenomicsWorkflowStack.Outputs.GenomicsEnvHighPriorityJobQueueArn + TemplateRootUrl: !Ref TemplateRootUrl Tags: !FindInMap ["TagMap", "default", "tags"] diff --git a/src/templates/nextflow/nextflow-resources.template.yaml b/src/templates/nextflow/nextflow-resources.template.yaml index ad72314d6..be5e84c9c 100644 --- a/src/templates/nextflow/nextflow-resources.template.yaml +++ b/src/templates/nextflow/nextflow-resources.template.yaml @@ -88,7 +88,21 @@ Parameters: BatchDefaultJobQueue: Type: String Description: >- - Name or ARN of the Batch Job Queue to use by default for workflow tasks. + ARN of the Batch Job Queue to use by default for workflow tasks. + + BatchHighPriorityJobQueue: + Type: String + Description: >- + ARN of the Batch Job Queue to use for high priority workflow tasks. + + TemplateRootUrl: + Type: String + Description: >- + Root URL for where nested templates are stored + Default: https://s3.amazonaws.com/aws-genomics-workflows/templates + ConstraintDescription: >- + Must be a valid S3 URL + AllowedPattern: "https://s3(-[a-z0-9]+)*\\.amazonaws\\.com/[a-z0-9-./]{3,}" Conditions: DataBucketIsNextflowBucket: @@ -120,70 +134,20 @@ Resources: - ServerSideEncryptionByDefault: SSEAlgorithm: AES256 Tags: !FindInMap ["TagMap", "default", "tags"] - - IAMCodeBuildRole: - Type: AWS::IAM::Role - Condition: BuildNextflowContainer - Properties: - AssumeRolePolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: Allow - Principal: - Service: codebuild.amazonaws.com - Action: sts:AssumeRole - - Policies: - - PolicyName: !Sub codebuild-ecr-access-${AWS::Region} - PolicyDocument: - Version: 2012-10-17 - Statement: - Effect: Allow - Resource: "*" - Action: - - "ecr:CreateRepository" - - "ecr:BatchCheckLayerAvailability" - - "ecr:CompleteLayerUpload" - - "ecr:GetAuthorizationToken" - - "ecr:InitiateLayerUpload" - - "ecr:PutImage" - - "ecr:UploadLayerPart" - - PolicyName: !Sub codebuild-logs-access-${AWS::Region} - PolicyDocument: - Version: 2012-10-17 - Statement: - Effect: Allow - Resource: "*" - Action: - - logs:CreateLogGroup - - logs:CreateLogStream - - logs:PutLogEvents - - IAMLambdaExecutionRole: - Type: AWS::IAM::Role + + ContainerBuildNextflow: + Type: AWS::CloudFormation::Stack Condition: BuildNextflowContainer Properties: - AssumeRolePolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Principal: - Service: lambda.amazonaws.com - Action: "sts:AssumeRole" - Path: / - ManagedPolicyArns: - - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole - - arn:aws:iam::aws:policy/service-role/AWSLambdaRole - Policies: - - PolicyName: !Sub codebuild-access-${AWS::Region} - PolicyDocument: - Version: "2012-10-17" - Statement: - - Effect: Allow - Action: - - "codebuild:StartBuild" - - "codebuild:BatchGetBuilds" - Resource: "*" + TemplateURL: !Sub ${TemplateRootUrl}/_common/container-build.template.yaml + Parameters: + ContainerName: nextflow + GitRepoType: GITHUB + GitCloneUrlHttp: https://github.com/aws-samples/aws-genomics-workflows.git + ProjectPath: ./src/containers/nextflow + ProjectBuildSpecFile: ./src/containers/buildspec-nextflow.yml + CreateBatchJobDefinition: "No" + Tags: !FindInMap ["TagMap", "default", "tags"] IAMNextflowJobRole: Type: AWS::IAM::Role @@ -193,10 +157,33 @@ Resources: PolicyDocument: Version: 2012-10-17 Statement: - - Effect: Allow + # Nextflow requires full read access to gather the state of jobs being executed + - Sid: "AWS Batch Read Access - All" + Effect: Allow Resource: "*" Action: - - "batch:*" + - "batch:List*" + - "batch:Describe*" + + # only permit access (job submission) to the queues and compute environments + # configured to run nextflow + - Sid: "AWS Batch Write Access - Job Submission" + Effect: Allow + Resource: + - !Ref BatchDefaultJobQueue + - !Ref BatchHighPriorityJobQueue + Action: + - "batch:*Job" + + # nextflow needs to be able to create job definitions + # these are prefixed with "nf-" + - Sid: "AWS Batch Write Access - Job Definitions" + Effect: Allow + Resource: + - arn:aws:batch:*:*:job-definition/nf-*:* + Action: + - "batch:*JobDefinition" + - PolicyName: !Sub Nextflow-S3Bucket-Access-${AWS::Region} PolicyDocument: Version: 2012-10-17 @@ -227,116 +214,18 @@ Resources: ManagedPolicyArns: - "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" - CodeBuildProject: - Type: AWS::CodeBuild::Project - Condition: BuildNextflowContainer - Properties: - Name: !Sub - - nextflow-container-${GUID} - - GUID: !Select [ 2, !Split [ "/", !Ref "AWS::StackId" ]] - Description: >- - Builds a nextflow container for running genomics workflows - Artifacts: - Type: NO_ARTIFACTS - Environment: - Type: LINUX_CONTAINER - Image: aws/codebuild/standard:1.0 - ComputeType: BUILD_GENERAL1_SMALL - PrivilegedMode: True - - ServiceRole: !GetAtt IAMCodeBuildRole.Arn - Source: - Type: NO_SOURCE - BuildSpec: !Sub - - |- - version: 0.2 - phases: - pre_build: - commands: - - echo "Docker Login to ECR" - - $(aws ecr get-login --no-include-email --region ${AWS::Region}) - - echo "Creating ECR image repository" - - aws ecr create-repository --repository-name nextflow || true - - echo "Getting source code from Github" - - git clone https://github.com/aws-samples/aws-genomics-workflows.git - - cd aws-genomics-workflows - - cd src/containers/nextflow - build: - commands: - - echo "Building container" - - docker build -t nextflow . - post_build: - commands: - - echo "Tagging container image" - - docker tag nextflow:latest ${REGISTRY}/nextflow:latest - - echo "Pushing container image to ECR" - - docker push ${REGISTRY}/nextflow:latest - - REGISTRY: !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com - - Tags: !FindInMap ["TagMap", "default", "tags"] - - CodeBuildInvocation: - Type: Custom::CodeBuildInvocation - Condition: BuildNextflowContainer - Properties: - ServiceToken: !GetAtt CodeBuildInvocationFunction.Arn - BuildProject: !Ref CodeBuildProject - - CodeBuildInvocationFunction: - Type: AWS::Lambda::Function - Condition: BuildNextflowContainer - Properties: - Handler: index.handler - Role: !GetAtt IAMLambdaExecutionRole.Arn - Runtime: python3.7 - Timeout: 600 - Code: - ZipFile: | - from time import sleep - - import boto3 - import cfnresponse - - def handler(event, context): - if event['RequestType'] in ("Create", "Update"): - codebuild = boto3.client('codebuild') - build = codebuild.start_build( - projectName=event["ResourceProperties"]["BuildProject"] - )['build'] - - id = build['id'] - status = build['buildStatus'] - while status == 'IN_PROGRESS': - sleep(10) - build = codebuild.batch_get_builds(ids=[id])['builds'][0] - status = build['buildStatus'] - - if status != "SUCCEEDED": - cfnresponse.send(event, context, cfnresponse.FAILED, None) - - cfnresponse.send(event, context, cfnresponse.SUCCESS, None) - BatchNextflowJobDefinition: Type: AWS::Batch::JobDefinition Properties: Type: container ContainerProperties: - MountPoints: - - ContainerPath: /opt/work - SourceVolume: scratch - Volumes: - - Host: - SourcePath: /scratch - Name: scratch - Command: - - Ref::NextflowScript Memory: 1024 JobRoleArn: !GetAtt IAMNextflowJobRole.Arn Vcpus: 2 Image: Fn::If: - BuildNextflowContainer - - !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/nextflow:latest + - !Sub ${ContainerBuildNextflow.Outputs.ContainerImage} - !Ref NextflowContainerImage Environment: - Name: "NF_JOB_QUEUE" @@ -403,7 +292,7 @@ Outputs: Value: Fn::If: - BuildNextflowContainer - - !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/nextflow:latest + - !Sub ${ContainerBuildNextflow.Outputs.ContainerImage} - !Ref NextflowContainerImage NextflowJobDefinition: diff --git a/src/templates/step-functions/lambda_function.py b/src/templates/step-functions/codebuild_invocation_function.py similarity index 100% rename from src/templates/step-functions/lambda_function.py rename to src/templates/step-functions/codebuild_invocation_function.py diff --git a/src/templates/step-functions/ecr_repository_handler_function.py b/src/templates/step-functions/ecr_repository_handler_function.py new file mode 100644 index 000000000..b6b6c4751 --- /dev/null +++ b/src/templates/step-functions/ecr_repository_handler_function.py @@ -0,0 +1,97 @@ +from time import sleep + +import boto3 +import cfnresponse + +send, SUCCESS, FAILED = ( + cfnresponse.send, + cfnresponse.SUCCESS, + cfnresponse.FAILED +) + +ecr = boto3.client('ecr') + +def wait(repo, until): + until = until.lower() + if until == "deleted": + while True: + try: + sleep(1) + ecr.describe_repositories(repositoryNames=[repo]) + except ecr.exceptions.RepositoryNotFoundException: + break + + if until == "exists": + exists = False + while not exists: + try: + sleep(1) + exists = ecr.describe_repositories(repositoryNames=[repo])["repositories"] + break + except ecr.exceptions.RepositoryNotFoundException: + exists = False + +def put_lifecycle_policy(repo, props): + if props.get("LifecyclePolicy"): + ecr.put_lifecycle_policy( + repositoryName=repo, + lifecyclePolicyText=props["LifecyclePolicy"]["LifecyclePolicyText"] + ) + +def create(repo, props, event, context): + # use existing repository if available, otherwise create + try: + ecr.create_repository(repositoryName=repo) + wait(repo, "exists") + put_lifecycle_policy(repo, props) + + except ecr.exceptions.RepositoryAlreadyExistsException: + print(f"Repository '{repo}' already exists - CREATE ECR repository ignored") + put_lifecycle_policy(repo, props) + + except Exception as e: + send(event, context, FAILED, None) + raise(e) + +def update(repo, props, event, context): + # use existing repository if available + update_policy = props.get("UpdateReplacePolicy") + try: + if update_policy and update_policy.lower() == "retain": + put_lifecycle_policy(repo, props) + + else: + # replace the repo + delete(repo, props, event, context) + create(repo, props, event, context) + + except Exception as e: + send(event, context, FAILED, None) + raise(e) + +def delete(repo, props, event, context): + # retain repository if specified + # otherwise force delete + delete_policy = props.get("DetetePolicy") + try: + if delete_policy and not delete_policy.lower() == "retain": + ecr.delete_repository(repositoryName=repo, force=True) + wait(repo, "deleted") + + except Exception as e: + send(event, context, FAILED, None) + raise(e) + +def handler(event, context): + props = event["ResourceProperties"] + repo = props.get("RepositoryName") + + if event["RequestType"] in ("Create", "Update", "Delete"): + action = globals()[event["RequestType"].lower()] + action(repo, props, event, context) + send(event, context, SUCCESS, None) + + else: + # unhandled request type + send(event, context, FAILED, None) + \ No newline at end of file diff --git a/src/templates/step-functions/sfn-workflow.template.yaml b/src/templates/step-functions/sfn-workflow.template.yaml index d6e8dc0e2..d6decd859 100644 --- a/src/templates/step-functions/sfn-workflow.template.yaml +++ b/src/templates/step-functions/sfn-workflow.template.yaml @@ -43,31 +43,40 @@ Resources: ContainerBuildBwa: Type: AWS::CloudFormation::Stack Properties: - TemplateURL: !Sub ${TemplateRootUrl}/step-functions/container-build.template.yaml + TemplateURL: !Sub ${TemplateRootUrl}/_common/container-build.template.yaml Parameters: ContainerName: bwa - GithubHttpUrl: https://github.com/aws-samples/aws-genomics-workflows.git + GitRepoType: GITHUB + GitCloneUrlHttp: https://github.com/aws-samples/aws-genomics-workflows.git ProjectPath: ./src/containers/bwa + ProjectBuildSpecFile: ./src/containers/buildspec-workflow-tool.yml + CreateBatchJobDefinition: "Yes" Tags: !FindInMap ["TagMap", "default", "tags"] ContainerBuildSamtools: Type: AWS::CloudFormation::Stack Properties: - TemplateURL: !Sub ${TemplateRootUrl}/step-functions/container-build.template.yaml + TemplateURL: !Sub ${TemplateRootUrl}/_common/container-build.template.yaml Parameters: ContainerName: samtools - GithubHttpUrl: https://github.com/aws-samples/aws-genomics-workflows.git + GitRepoType: GITHUB + GitCloneUrlHttp: https://github.com/aws-samples/aws-genomics-workflows.git ProjectPath: ./src/containers/samtools + ProjectBuildSpecFile: ./src/containers/buildspec-workflow-tool.yml + CreateBatchJobDefinition: "Yes" Tags: !FindInMap ["TagMap", "default", "tags"] ContainerBuildBcftools: Type: AWS::CloudFormation::Stack Properties: - TemplateURL: !Sub ${TemplateRootUrl}/step-functions/container-build.template.yaml + TemplateURL: !Sub ${TemplateRootUrl}/_common/container-build.template.yaml Parameters: ContainerName: bcftools - GithubHttpUrl: https://github.com/aws-samples/aws-genomics-workflows.git + GitRepoType: GITHUB + GitCloneUrlHttp: https://github.com/aws-samples/aws-genomics-workflows.git ProjectPath: ./src/containers/bcftools + ProjectBuildSpecFile: ./src/containers/buildspec-workflow-tool.yml + CreateBatchJobDefinition: "Yes" Tags: !FindInMap ["TagMap", "default", "tags"] IAMStepFunctionsExecutionRole: