handle multi-line run command appropriately (#152) #832

Workflow file for this run

.github/workflows/upload-and-deploy.yaml at 4317d9d

	name: upload-and-deploy

	on:
	push:
	branches: "*"
	tags-ignore: "*"

	env:
	NAMESPACE: main
	PYTHON_VERSION: 3.9
	DEV_INPUT_BUCKET: recover-dev-input-data
	DEV_RAW_BUCKET: recover-dev-raw-data
	DEV_INTERMEDIATE_BUCKET: recover-dev-intermediate-data
	DEV_PROCESSED_BUCKET: recover-dev-processed-data
	PROD_INPUT_BUCKET: recover-input-data
	PROD_RAW_BUCKET: recover-raw-data
	PROD_INTERMEDIATE_BUCKET: recover-intermediate-data
	INTEGRATION_TEST_NUM_EXPORTS: 28
	ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

	jobs:

	pre-commit:
	name: Run pre-commit hooks against all files
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	- uses: pre-commit/[email protected]

	deploy-snowflake:
	name: Deploy Snowflake resources
	needs: pre-commit
	runs-on: ubuntu-latest
	env:
	PRIVATE_KEY_PASSPHRASE: ${{ secrets.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE }}
	SNOWFLAKE_ENVIRONMENT: ${{ github.ref_name == 'main' && 'staging' \|\| github.ref_name }}
	steps:
	- uses: actions/checkout@v3

	- name: Configure Snowflake connection
	run: \|
	# Create temporary files for config.toml and our private key
	config_file=$(mktemp)
	private_key_file=$(mktemp)

	# Write to the private key file
	echo "${{ secrets.SNOWFLAKE_PRIVATE_KEY }}" > $private_key_file

	# Write to config.toml file
	echo 'default_connection_name = "recover"' >> $config_file
	echo '[connections.recover]' >> $config_file
	echo "account = \"${{ vars.SNOWFLAKE_ACCOUNT }}\"" >> $config_file
	echo "user = \"${{ vars.SNOWFLAKE_USER }}\"" >> $config_file
	echo "role = \"${{ vars.SNOWFLAKE_ROLE }}\"" >> $config_file
	echo 'warehouse = "RECOVER_XSMALL"' >> $config_file
	echo 'authenticator = "SNOWFLAKE_JWT"' >> $config_file
	echo "private_key_path = \"$private_key_file\"" >> $config_file

	# Write config.toml path to global environment
	echo "SNOWFLAKE_CONFIG_PATH=$config_file" >> $GITHUB_ENV

	- name: Configuration file information
	run: \|
	echo "Snowflake configuration is located at $SNOWFLAKE_CONFIG_PATH"
	cat $SNOWFLAKE_CONFIG_PATH

	- name: Install Snowflake CLI
	uses: Snowflake-Labs/[email protected]
	with:
	default-config-file-path: ${{ env.SNOWFLAKE_CONFIG_PATH }}

	- name: Test Snowflake connection
	run: \|
	snow --version
	snow connection test

	- name: Deploy Snowflake objects
	run: \|
	snow sql \
	-D "git_branch=$GITHUB_REF_NAME" \
	-D "environment=$SNOWFLAKE_ENVIRONMENT" \
	-f snowflake/objects/deploy.sql

	upload-files:
	name: Upload files to S3 bucket in development
	runs-on: ubuntu-latest
	needs: pre-commit
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	environment: develop
	steps:

	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Setup sam
	uses: aws-actions/setup-sam@v2

	- name: Set namespace for non-default branch
	if: github.ref_name != 'main'
	run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV

	- name: Copy files to templates bucket, use dev cloudformation bucket
	run: >
	python src/scripts/manage_artifacts/artifacts.py
	--upload
	--namespace $NAMESPACE
	--cfn_bucket ${{ vars.CFN_BUCKET }}
	--shareable-artifacts-bucket ${{ vars.SHAREABLE_ARTIFACTS_BUCKET }}

	nonglue-unit-tests:
	name: Runs unit tests that are not dependent on aws-glue package resources
	runs-on: ubuntu-latest
	needs: pre-commit
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	environment: develop
	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: ${{ github.event.repository.name }}-${{ github.run_id }}-nonglue-unit-tests
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Install additional python dependency
	run: \|
	pipenv install ecs_logging~=2.0
	pipenv install pytest-datadir

	- name: Test scripts with pytest (lambda, etc.)
	run: \|
	pipenv run python -m pytest -v \
	tests/test_s3_event_config_lambda.py \
	tests/test_s3_to_glue_lambda.py \
	tests/test_lambda_dispatch.py \
	tests/test_consume_logs.py \
	tests/test_lambda_raw.py \
	tests/test_lambda_raw_sync.py

	- name: Test dev synapse folders for STS access with pytest
	run: >
	pipenv run python -m pytest tests/test_setup_external_storage.py
	--test-bucket $DEV_INPUT_BUCKET
	--test-synapse-folder-id syn51758510
	--namespace $NAMESPACE
	--test-sts-permission read_only
	-v

	pipenv run python -m pytest tests/test_setup_external_storage.py
	--test-bucket $DEV_PROCESSED_BUCKET
	--test-synapse-folder-id syn51084525
	--namespace $NAMESPACE/parquet
	--test-sts-permission read_write
	-v

	pytest-docker:
	name: Build and push testing docker images to the pytest ECR repository.
	needs: pre-commit
	runs-on: ubuntu-latest
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	strategy:
	matrix:
	include:
	- tag_name: aws_glue_3
	dockerfile: tests/Dockerfile.aws_glue_3
	- tag_name: aws_glue_4
	dockerfile: tests/Dockerfile.aws_glue_4
	environment: develop
	steps:
	- name: Assume AWS role
	uses: aws-actions/configure-aws-credentials@v2
	with:
	role-to-assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	aws-region: "us-east-1"
	# unmasking of the AWS account ID allows the acct id to pass through outputs
	mask-aws-account-id: "no"

	- name: Login to Amazon ECR
	id: login-ecr
	uses: aws-actions/amazon-ecr-login@v1

	- name: Get ECR secret names
	id: ecr
	run: \|
	usernameKey=docker_username_$(echo ${{ steps.login-ecr.outputs.registry }} \| tr '.-' _)
	echo "username-key=$usernameKey" >> $GITHUB_OUTPUT
	passwordKey=docker_password_$(echo ${{ steps.login-ecr.outputs.registry }} \| tr '.-' _)
	echo "password-key=$passwordKey" >> $GITHUB_OUTPUT
	- uses: actions/checkout@v3

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v2

	- name: Build and push to ECR
	id: docker-build-push
	uses: docker/build-push-action@v4
	with:
	push: true
	tags: ${{ steps.login-ecr.outputs.registry }}/pytest:${{ github.ref_name }}_${{ matrix.tag_name }}
	file: ${{ matrix.dockerfile }}
	cache-from: type=local,src=/tmp/.buildx-cache
	cache-to: type=local,dest=/tmp/.buildx-cache
	outputs:
	ecr-registry: ${{ steps.login-ecr.outputs.registry }}
	ecr-username: ${{ steps.login-ecr.outputs[steps.ecr.outputs.username-key] }}
	ecr-password: ${{ steps.login-ecr.outputs[steps.ecr.outputs.password-key] }}

	glue-unit-tests:
	name: Run Pytest unit tests for AWS glue
	needs: pytest-docker
	environment: develop
	runs-on: ubuntu-latest
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	strategy:
	matrix:
	tag_name:
	["aws_glue_3", "aws_glue_4"]
	container:
	image: ${{ needs.pytest-docker.outputs.ecr-registry }}/pytest:${{ github.ref_name }}_${{ matrix.tag_name }}
	credentials:
	username: ${{ needs.pytest-docker.outputs.ecr-username }}
	password: ${{ needs.pytest-docker.outputs.ecr-password }}
	env:
	DISABLE_SSL: true
	options: "--user root"
	steps:
	- name: Assume AWS role
	uses: aws-actions/configure-aws-credentials@v2
	with:
	role-to-assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	aws-region: "us-east-1"
	- uses: actions/checkout@v3
	- run: chown -R glue_user $GITHUB_WORKSPACE
	- run: su - glue_user --command "aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID"
	- run: su - glue_user --command "aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY"
	- run: su - glue_user --command "aws configure set aws_session_token $AWS_SESSION_TOKEN"
	- run: su - glue_user --command "aws configure set region $AWS_REGION"

	- name: Set namespace for non-default branch or for tag
	if: github.ref_name != 'main'
	run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV

	- name: Run Pytest unit tests under AWS Glue 3.0
	if: matrix.tag_name == 'aws_glue_3'
	run: \|
	su - glue_user --command "cd $GITHUB_WORKSPACE && python3 -m pytest \
	tests/test_s3_to_json.py \
	tests/test_compare_parquet_datasets.py -v"

	- name: Run unit tests for JSON to Parquet under AWS Glue 4.0
	if: matrix.tag_name == 'aws_glue_4'
	run: >
	su - glue_user --command "cd $GITHUB_WORKSPACE &&
	python3 -m pytest tests/test_json_to_parquet.py --namespace $NAMESPACE -v"

	- name: Run unit tests for Great Expectations on Parquet under AWS Glue 4.0
	if: matrix.tag_name == 'aws_glue_4'
	run: >
	su - glue_user --command "cd $GITHUB_WORKSPACE &&
	python3 -m pytest tests/test_run_great_expectations_on_parquet.py -v"

	sceptre-deploy-develop:
	name: Deploys branch using sceptre
	runs-on: ubuntu-latest
	needs: [upload-files, nonglue-unit-tests, glue-unit-tests]
	environment: develop
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read

	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Create directory for remote sceptre templates
	run: mkdir -p templates/remote/

	- name: Set namespace for non-default branch
	if: github.ref_name != 'main'
	run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV

	- name: "Deploy sceptre stacks to dev"
	run: pipenv run sceptre --debug --var "namespace=${{ env.NAMESPACE }}" launch develop --yes

	- name: Delete preexisting S3 event notification for this namespace
	uses: gagoar/invoke-aws-lambda@v3
	with:
	AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }}
	AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }}
	REGION: ${{ env.AWS_REGION }}
	FunctionName: ${{ env.NAMESPACE }}-S3EventConfig
	Payload: '{"RequestType": "Delete"}'
	LogType: Tail

	- name: Create S3 event notification for this namespace
	uses: gagoar/invoke-aws-lambda@v3
	with:
	AWS_ACCESS_KEY_ID: ${{ env.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ env.AWS_SECRET_ACCESS_KEY }}
	AWS_SESSION_TOKEN: ${{ env.AWS_SESSION_TOKEN }}
	REGION: ${{ env.AWS_REGION }}
	FunctionName: ${{ env.NAMESPACE }}-S3EventConfig
	Payload: '{"RequestType": "Create"}'
	LogType: Tail

	integration-test-develop-cleanup:
	name: Cleanup non-main branch data before integration tests
	runs-on: ubuntu-latest
	needs: sceptre-deploy-develop
	environment: develop
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Set namespace for non-default branch
	if: github.ref_name != 'main'
	run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV

	- name: Clean input data bucket
	run: >
	pipenv run python src/scripts/manage_artifacts/clean_for_integration_test.py
	--bucket $DEV_INPUT_BUCKET
	--bucket_prefix "${{ env.NAMESPACE }}/"

	- name: Clean raw data bucket
	run: >
	pipenv run python src/scripts/manage_artifacts/clean_for_integration_test.py
	--bucket $DEV_RAW_BUCKET
	--bucket_prefix "${{ env.NAMESPACE }}/json/"

	- name: Clean intermediate data bucket
	run: >
	pipenv run python src/scripts/manage_artifacts/clean_for_integration_test.py
	--bucket $DEV_INTERMEDIATE_BUCKET
	--bucket_prefix "${{ env.NAMESPACE }}/json/"

	integration-test-develop:
	name: Triggers ETL workflow with S3 test files
	runs-on: ubuntu-latest
	needs: [sceptre-deploy-develop, integration-test-develop-cleanup]
	environment: develop
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	env:
	EXPORT_S3_KEY_PREFIX: pilot-data
	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: integration-test-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Set namespace for non-default branch or for tag
	if: github.ref_name != 'main'
	run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV

	- name: Fetch the most recent exports.
	id: recent-exports
	run: \|
	# Retrieve the last ~2 weeks of exports from each cohort
	# Ignore keys which end with "/" and which match "owner.txt"
	echo "KEYS=$(
	aws s3api list-objects-v2 \
	--bucket $DEV_INPUT_BUCKET \
	--prefix $EXPORT_S3_KEY_PREFIX \
	--query '((sort_by(Contents[? !ends_with(Key, `/`) && !contains(Key, `owner.txt`)], &LastModified)[::-1])[:${{ env.INTEGRATION_TEST_NUM_EXPORTS }}])[*].Key' \|
	jq -c
	)" >> "$GITHUB_OUTPUT"

	- name: Copy most recent exports to this namespace
	run: >
	echo '${{ steps.recent-exports.outputs.KEYS }}' \| jq -r '.[]' \| while read -r key; do
	aws s3 cp "s3://$DEV_INPUT_BUCKET/$key" "s3://$DEV_INPUT_BUCKET/$NAMESPACE/${key#"$EXPORT_S3_KEY_PREFIX"/}";
	done

	- name: Write most recent exports to S3 cloudformation bucket
	run: >
	echo '${{ steps.recent-exports.outputs.KEYS }}' \|
	jq --arg bucket "s3://$DEV_INPUT_BUCKET/" '.[] \|= $bucket + .' \|
	aws s3 cp - "s3://${{ vars.CFN_BUCKET }}/$NAMESPACE/integration_test_exports.json"

	sceptre-deploy-staging:
	name: Deploys to staging of prod using sceptre
	runs-on: ubuntu-latest
	needs: integration-test-develop
	if: github.ref_name == 'main'
	environment: prod
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Copy files to templates bucket
	run: >
	python src/scripts/manage_artifacts/artifacts.py
	--upload
	--namespace staging
	--cfn_bucket ${{ vars.CFN_BUCKET }}
	--shareable-artifacts-bucket ${{ vars.SHAREABLE_ARTIFACTS_BUCKET }}

	- name: Create directory for remote sceptre templates
	run: mkdir -p templates/remote/

	- name: Deploy sceptre stacks to staging on prod
	run: pipenv run sceptre --var "namespace=staging" launch prod --yes

	integration-test-staging-cleanup:
	name: Cleanup main branch staging data before integration tests
	runs-on: ubuntu-latest
	needs: sceptre-deploy-staging
	if: github.ref_name == 'main'
	environment: prod
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: GitHubActions-${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Clean input data bucket
	run: >
	pipenv run python src/scripts/manage_artifacts/clean_for_integration_test.py
	--bucket $PROD_INPUT_BUCKET
	--bucket_prefix "staging/"

	- name: Clean raw data bucket
	run: >
	pipenv run python src/scripts/manage_artifacts/clean_for_integration_test.py
	--bucket $PROD_RAW_BUCKET
	--bucket_prefix "staging/json/"

	- name: Clean intermediate data bucket
	run: >
	pipenv run python src/scripts/manage_artifacts/clean_for_integration_test.py
	--bucket $PROD_INTERMEDIATE_BUCKET
	--bucket_prefix "staging/json/"

	integration-test-staging:
	name: Triggers staging workflow with production data
	runs-on: ubuntu-latest
	needs: [sceptre-deploy-staging, integration-test-staging-cleanup]
	environment: prod
	# These permissions are needed to interact with GitHub's OIDC Token endpoint.
	permissions:
	id-token: write
	contents: read
	env:
	EXPORT_S3_KEY_PREFIX: main
	steps:
	- name: Setup code, pipenv, aws
	uses: Sage-Bionetworks/action-pipenv-aws-setup@v3
	with:
	role_to_assume: ${{ vars.AWS_CREDENTIALS_IAM_ROLE }}
	role_session_name: integration-test-${{ github.run_id }}
	python_version: ${{ env.PYTHON_VERSION }}

	- name: Fetch the most recent exports.
	id: recent-exports
	run: \|
	# Retrieve the last ~2 weeks of exports from each cohort
	# Ignore keys which end with "/" and which match "owner.txt"
	echo "KEYS=$(
	aws s3api list-objects-v2 \
	--bucket $PROD_INPUT_BUCKET \
	--prefix "$EXPORT_S3_KEY_PREFIX/" \
	--query '((sort_by(Contents[? !ends_with(Key, `/`) && !contains(Key, `owner.txt`)], &LastModified)[::-1])[:${{ env.INTEGRATION_TEST_NUM_EXPORTS }}])[*].Key' \|
	jq -c
	)" >> "$GITHUB_OUTPUT"

	- name: Copy most recent exports to this namespace
	run: >
	echo '${{ steps.recent-exports.outputs.KEYS }}' \| jq -r '.[]' \| while read -r key; do
	aws s3 cp "s3://$PROD_INPUT_BUCKET/$key" "s3://$PROD_INPUT_BUCKET/staging/${key#"$EXPORT_S3_KEY_PREFIX"/}";
	done

	- name: Write most recent exports to S3 cloudformation bucket
	run: >
	echo '${{ steps.recent-exports.outputs.KEYS }}' \|
	jq --arg bucket "s3://$PROD_INPUT_BUCKET/" '.[] \|= $bucket + .' \|
	aws s3 cp - "s3://${{ vars.CFN_BUCKET }}/staging/integration_test_exports.json"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

handle multi-line run command appropriately (#152) #832

Workflow file

handle multi-line run command appropriately (#152) #832

Jobs

Run details

Workflow file for this run