Skip to content

Create aws.yml

Create aws.yml #39

Workflow file for this run

name: List S3 Objects - AWS
on:
pull_request:
env:
AWS_REGION: eu-west-1
AWS_ROLE_ARN: "arn:aws:iam::719197435995:role/DbtSparkTestingActions"
S3_BUCKET: "dbt-spark-iceberg/github-integration-testing"
DBT_PROFILES_DIR: ./ci
permissions:
id-token: write
contents: read
jobs:
list_s3_objects:
name: list_s3_objects
runs-on: ubuntu-latest
defaults:
run:
working-directory: .github/workflows/spark_deployment
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ env.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
mask-aws-account-id: true
mask-aws-role-arn: true
role-session-name: GithubActionsSession
role-duration-seconds: 3600
output-credentials: true
- name: Verify AWS credentials and S3 access
run: |
aws sts get-caller-identity
aws s3 ls s3://${{ env.S3_BUCKET }} --summarize
# Test S3 write access
echo "test" > test.txt
aws s3 cp test.txt s3://${{ env.S3_BUCKET }}/test.txt
aws s3 rm s3://${{ env.S3_BUCKET }}/test.txt
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
docker-compose --version
- name: Configure Docker environment
run: |
# Export AWS credentials from assumed role
export AWS_ACCESS_KEY_ID=$(aws configure get aws_access_key_id)
export AWS_SECRET_ACCESS_KEY=$(aws configure get aws_secret_access_key)
export AWS_SESSION_TOKEN=$(aws configure get aws_session_token)
# Create Docker environment file with proper escaping
echo "AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}" > .env
echo "AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}" > .env
echo "AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}" >> .env
echo "AWS_REGION=${AWS_REGION}" >> .env
- name: Configure Docker credentials
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_USERNAME }}
password: ${{ secrets.DOCKERHUB_SNOWPLOWCI_READ_PASSWORD }}
- name: Clean up Docker
run: |
docker system prune -af
docker volume prune -f
- name: Build and start Spark cluster
id: spark-startup
run: |
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 30 # Initial wait
# Get container ID and store it
CONTAINER_NAME=$(docker ps --format '{{.Names}}' | grep thrift-server)
echo "container_name=${CONTAINER_NAME}" >> $GITHUB_OUTPUT
# Wait for Spark to be fully initialized
for i in {1..30}; do
if docker logs ${CONTAINER_NAME} 2>&1 | grep -q "HiveThriftServer2 started"; then
echo "Spark initialized successfully"
break
fi
echo "Waiting for Spark initialization... attempt $i"
sleep 3
done
# Verify Spark is running
docker ps
docker logs ${CONTAINER_NAME}
- name: Python setup
uses: actions/setup-python@v4
with:
python-version: "3.8.x"
- name: Install spark dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-spark[PyHive]"==1.7.0 --upgrade
- name: Verify Spark cluster and connection
run: |
docker ps
docker logs ${{ steps.spark-startup.outputs.container_name }}
docker exec ${{ steps.spark-startup.outputs.container_name }} beeline -u "jdbc:hive2://localhost:10000" -e "show databases;"
- name: Run DBT Debug
working-directory: ./integration_tests
run: |
# Get service logs before attempting debug
docker logs ${{ steps.spark-startup.outputs.container_name }}
dbt deps
dbt debug --target spark_iceberg
- name: Clean up before tests
working-directory: ./integration_tests
run: dbt run-operation post_ci_cleanup --target spark_iceberg
- name: Run tests
working-directory: ./integration_tests
run: |
set -e
./.scripts/integration_test.sh -d spark_iceberg
- name: Capture Spark logs on failure
if: failure()
run: |
echo "Capturing Spark logs..."
docker logs ${{ steps.spark-startup.outputs.container_name }} > spark_logs.txt
cat spark_logs.txt
echo "Capturing Spark UI details..."
curl -v http://localhost:4040/api/v1/applications > spark_ui.txt || true
cat spark_ui.txt
- name: Upload logs as artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: spark-logs
path: |
spark_logs.txt
spark_ui.txt
compression-level: 6 # Moderate compression
retention-days: 5 # Keep logs for 5 days
- name: Cleanup
if: always()
run: |
docker-compose down
docker system prune -af
rm -f .env