Skip to content

windows eks test

windows eks test #1

## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
## SPDX-License-Identifier: Apache-2.0
# This is a reusable workflow for running the Enablement test for Application Signals.
# It is meant to be called from another workflow.
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
name: DotNet EKS Use Case
on:
workflow_call:
inputs:
aws-region:
required: true
type: string
test-cluster-name:
required: true
type: string
adot-image-name:
required: false
type: string
cw-agent-operator-tag:
required: false
type: string
caller-workflow-name:
required: true
type: string
outputs:
job-started:
value: ${{ jobs.dotnet-eks.outputs.job-started }}
validation-result:
value: ${{ jobs.dotnet-eks.outputs.validation-result }}
permissions:
id-token: write
contents: read
env:
# E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }}
E2E_TEST_ACCOUNT_ID: ${{ secrets.EKS_WINDOWS_TEST_ROLE_ID }}
E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }}
E2E_TEST_AWS_REGION: ${{ inputs.aws-region }}
CLUSTER_NAME: ${{ inputs.test-cluster-name }}
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP_NAME: /aws/application-signals/data
TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE}
ADOT_IMAGE_NAME: ${{ inputs.adot-image-name }}
CW_AGENT_OPERATOR_TAG: ${{ inputs.cw-agent-operator-tag }}
jobs:
dotnet-eks:
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
job-started: ${{ steps.job-started.outputs.job-started }}
validation-result: ${{ steps.validation-result.outputs.validation-result }}
steps:
- name: Check if the job started
id: job-started
run: echo "job-started=true" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
with:
repository: 'aws-observability/aws-application-signals-test-framework'
ref: ${{ inputs.caller-workflow-name == 'main-build' && 'main' || github.ref }}
fetch-depth: 0
- name: Initiate Gradlew Daemon
id: initiate-gradlew
uses: ./.github/workflows/actions/execute_and_retry
continue-on-error: true
with:
command: "./gradlew"
cleanup: "./gradlew clean"
max_retry: 3
sleep_time: 60
- name: Download enablement script
uses: ./.github/workflows/actions/execute_and_retry
with:
pre-command: "mkdir enablement-script && cd enablement-script"
command: "wget https://raw.githubusercontent.com/aws-observability/application-signals-demo/main/scripts/eks/appsignals/enable-app-signals.sh
&& wget https://raw.githubusercontent.com/aws-observability/application-signals-demo/main/scripts/eks/appsignals/clean-app-signals.sh"
cleanup: "rm -f enable-app-signals.sh && rm -f clean-app-signals.sh"
post-command: "chmod +x enable-app-signals.sh && chmod +x clean-app-signals.sh"
- name: Remove log group deletion command
if: always()
working-directory: enablement-script
run: |
delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP_NAME }}' --region \$REGION"
sed -i "s#$delete_log_group##g" clean-app-signals.sh
- name: Generate testing id and dotnet sample app namespace
run: |
echo TESTING_ID="${{ env.E2E_TEST_AWS_REGION }}-${{ github.run_id }}-${{ github.run_number }}-windows" >> $GITHUB_ENV
echo DOTNET_SAMPLE_APP_NAMESPACE="ns-windows-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
aws-region: us-east-1
- name: Retrieve account
uses: aws-actions/aws-secretsmanager-get-secrets@v1
with:

Check failure on line 105 in .github/workflows/dotnet-eks-windows-test.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/dotnet-eks-windows-test.yml

Invalid workflow file

You have an error in your yaml syntax on line 105
secret-ids: |
# ACCOUNT_ID, region-account/${{ env.E2E_TEST_AWS_REGION }}
DOTNET_MAIN_SAMPLE_APP_IMAGE, e2e-test/dotnet-main-sample-app-image
DOTNET_REMOTE_SAMPLE_APP_IMAGE, e2e-test/dotnet-remote-sample-app-image
# ADOT_E2E_TEST_ROLE_ARN is used to access main build e2e test cluster
# E2E_TEST_ROLE_ARN is used to access canary e2e test cluster
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
# role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
aws-region: ${{ env.E2E_TEST_AWS_REGION }}
# local directory to store the kubernetes config
- name: Create kubeconfig directory
run: mkdir -p ${{ github.workspace }}/.kube
- name: Set KUBECONFIG environment variable
run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV
- name: Set up kubeconfig
run: aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} --region ${{ env.E2E_TEST_AWS_REGION }}
- name: Download and install eksctl
uses: ./.github/workflows/actions/execute_and_retry
with:
pre-command: 'mkdir ${{ github.workspace }}/eksctl'
command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
&& tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz'
cleanup: 'rm -f eksctl_Linux_amd64.tar.gz'
- name: Add eksctl to Github Path
run: |
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH
# This step deletes lingering resources from previous test runs
- name: Delete all sample app namespaces
continue-on-error: true
timeout-minutes: 5
run: kubectl get namespace | awk '/^ns-[0-9]+-[0-9]+/{print $1}' | xargs -r kubectl delete namespace
- name: Create role for AWS access from the sample app
id: create_service_account
uses: ./.github/workflows/actions/execute_and_retry
with:
command: "eksctl create iamserviceaccount \
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-cluster-name }} \
--role-name eks-s3-access-${{ env.TESTING_ID }} \
--attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \
--region ${{ env.E2E_TEST_AWS_REGION }} \
--approve"
cleanup: "eksctl delete iamserviceaccount \
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-cluster-name }} \
--region ${{ env.E2E_TEST_AWS_REGION }}"
sleep_time: 60
- name: Set up terraform
uses: ./.github/workflows/actions/execute_and_retry
with:
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
&& sudo apt update && sudo apt install terraform'
sleep_time: 60
- name: Initiate Terraform
uses: ./.github/workflows/actions/execute_and_retry
with:
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/dotnet/eks/linux && terraform init && terraform validate"
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
max_retry: 6
sleep_time: 60
- name: Set Sample App Image
run: |
echo MAIN_SAMPLE_APP_IMAGE_ARN="${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/${{ env.DOTNET_MAIN_SAMPLE_APP_IMAGE }}" >> $GITHUB_ENV
echo REMOTE_SAMPLE_APP_IMAGE_ARN="${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/${{ env.DOTNET_REMOTE_SAMPLE_APP_IMAGE }}" >> $GITHUB_ENV
- name: Deploy sample app via terraform and wait for the endpoint to come online
id: deploy-dotnet-app
working-directory: ./terraform/dotnet/eks/windows
run: |
# Attempt to deploy the sample app on an EKS instance and wait for its endpoint to come online.
# There may be occasional failures due to transitivity issues, so try up to 2 times.
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
# that it failed at some point
retry_counter=0
max_retry=2
while [ $retry_counter -lt $max_retry ]; do
echo "Attempt $retry_counter"
deployment_failed=0
terraform apply -auto-approve \
-var="test_id=${{ env.TESTING_ID }}" \
-var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \
-var="kube_directory_path=${{ github.workspace }}/.kube" \
-var="eks_cluster_name=${{ env.CLUSTER_NAME }}" \
-var="eks_cluster_context_name=$(kubectl config current-context)" \
-var="test_namespace=${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}" \
-var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \
-var="dotnet_app_image=${{ env.MAIN_SAMPLE_APP_IMAGE_ARN }}" \
-var="dotnet_remote_app_image=${{ env.REMOTE_SAMPLE_APP_IMAGE_ARN }}" \
-var='account_id=${{ env.ACCOUNT_ID }}' \
|| deployment_failed=$?
if [ $deployment_failed -ne 0 ]; then
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
fi
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
if [ $deployment_failed -eq 0 ]; then
. ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
execute_and_retry 3 \
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/enable-app-signals.sh \
${{ env.CLUSTER_NAME }} \
${{ env.E2E_TEST_AWS_REGION }} \
${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}" \
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/clean-app-signals.sh \
${{ env.CLUSTER_NAME }} \
${{ env.E2E_TEST_AWS_REGION }} \
${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} && \
aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} --region ${{ env.E2E_TEST_AWS_REGION }}" \
60
aws eks update-kubeconfig --region ${{ env.E2E_TEST_AWS_REGION }} --name ${{ env.CLUSTER_NAME }}
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}" "" 60
execute_and_retry 2 "kubectl wait --for=condition=Ready --timeout=5m --request-timeout '5m' pod --all -n ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}" "" 10
fi
# If the deployment_failed is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
# resources created from terraform and try again.
if [ $deployment_failed -eq 1 ]; then
echo "Cleaning up App Signal"
${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/clean-app-signals.sh \
${{ env.CLUSTER_NAME }} \
${{ env.E2E_TEST_AWS_REGION }} \
${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
# Running clean-app-signal.sh removes the current cluster from the config. Update the cluster again for subsequent runs.
aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} --region ${{ env.E2E_TEST_AWS_REGION }}
echo "Destroying terraform"
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}" \
-var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \
-var="kube_directory_path=${{ github.workspace }}/.kube" \
-var="eks_cluster_name=${{ env.CLUSTER_NAME }}" \
-var="eks_cluster_context_name=$(kubectl config current-context)" \
-var="test_namespace=${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}" \
-var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \
-var="dotnet_app_image=${{ env.MAIN_SAMPLE_APP_IMAGE_ARN }}" \
-var="dotnet_remote_app_image=${{ env.REMOTE_SAMPLE_APP_IMAGE_ARN }}"
retry_counter=$(($retry_counter+1))
else
# If deployment succeeded, then exit the loop
break
fi
if [ $retry_counter -ge $max_retry ]; then
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
exit 1
fi
done
- name: Get ECR to Patch
run: |
if [ "${{ github.event.repository.name }}" = "amazon-cloudwatch-agent" ]; then
echo PATCH_IMAGE_ARN="${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/cwagent-integration-test:${{ github.sha }}" >> $GITHUB_ENV
elif [ "${{ github.event.repository.name }}" = "amazon-cloudwatch-agent-operator" ]; then
echo PATCH_IMAGE_ARN="${{ vars.ECR_OPERATOR_STAGING_REPO }}:${{ env.CW_AGENT_OPERATOR_TAG }}" >> $GITHUB_ENV
elif [ "${{ github.event.repository.name }}" = "aws-otel-dotnet-instrumentation" ]; then
echo PATCH_IMAGE_ARN="${{ env.ADOT_IMAGE_NAME }}" >> $GITHUB_ENV
fi
- name: Patch Image and Check Diff
id: patch-image
uses: ./.github/workflows/actions/patch_image_and_check_diff
with:
repository: ${{ github.event.repository.name }}
patch-image-arn: ${{ env.PATCH_IMAGE_ARN }}
sample-app-namespace: ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
- name: Log Artifact Versions
run: |
echo "ADOT Image: Previous Version is: ${{ steps.patch-image.outputs.default-adot-image }}, Updated Version is: ${{ steps.patch-image.outputs.latest-adot-image }}";
echo "CW Agent Image: Previous Version is: ${{ steps.patch-image.outputs.default-cw-agent-image }}, Updated Version is: ${{ steps.patch-image.outputs.latest-cw-agent-image }}";
echo "CW Agent Operator Image: Previous Version is: ${{ steps.patch-image.outputs.default-cw-agent-operator-image }}, Updated Version is: ${{ steps.patch-image.outputs.latest-cw-agent-operator-image }}";
echo "Fluent Bit Image: Version is: ${{ steps.patch-image.outputs.fluent-bit-image }}";
- name: Get Remote Service Deployment Name
uses: ./.github/workflows/actions/execute_and_retry
with:
command: echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV
max_retry: 3
sleep_time: 30
- name: Get Remote Service IP
uses: ./.github/workflows/actions/execute_and_retry
with:
command: echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV
max_retry: 3
sleep_time: 30
- name: Get Sample App Endpoint
uses: ./.github/workflows/actions/execute_and_retry
with:
command: echo "APP_ENDPOINT=$(kubectl get pods -n ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} --selector=app=dotnet-app -o jsonpath='{.items[0].status.podIP}'):8080" >> $GITHUB_ENV
max_retry: 3
sleep_time: 30
- name: Set endpoints for the traffic generator
uses: ./.github/workflows/actions/execute_and_retry
with:
command: kubectl set env -n ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} deployment/traffic-generator MAIN_ENDPOINT=${{ env.APP_ENDPOINT }} REMOTE_ENDPOINT=${{ env.REMOTE_SERVICE_POD_IP }}
max_retry: 3
sleep_time: 30
- name: Initiate Gradlew Daemon
if: steps.initiate-gradlew == 'failure'
uses: ./.github/workflows/actions/execute_and_retry
continue-on-error: true
with:
command: "./gradlew"
cleanup: "./gradlew clean"
max_retry: 3
sleep_time: 60
# Validation for application signals telemetry data
- name: Call endpoint and validate generated EMF logs
id: log-validation
if: steps.deploy-dotnet-app.outcome == 'success' && !cancelled()
run: ./gradlew validator:run --args='-c dotnet/eks/linux/log-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.APP_ENDPOINT }}
--region ${{ env.E2E_TEST_AWS_REGION }}
--account-id ${{ env.ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--app-namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
--platform-info ${{ inputs.test-cluster-name }}
--service-name dotnet-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'
- name: Call endpoints and validate generated metrics
id: metric-validation
if: (steps.deploy-dotnet-app.outcome == 'success' || steps.log-validation.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c dotnet/eks/linux/metric-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.APP_ENDPOINT }}
--region ${{ env.E2E_TEST_AWS_REGION }}
--account-id ${{ env.ACCOUNT_ID }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP_NAME }}
--app-namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
--platform-info ${{ inputs.test-cluster-name }}
--service-name dotnet-application-${{ env.TESTING_ID }}
--remote-service-name dotnet-remote-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'
- name: Call endpoints and validate generated traces
id: trace-validation
if: (steps.deploy-dotnet-app.outcome == 'success' || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c dotnet/eks/linux/trace-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.APP_ENDPOINT }}
--region ${{ env.E2E_TEST_AWS_REGION }}
--account-id ${{ env.ACCOUNT_ID }}
--log-group ${{ env.LOG_GROUP_NAME }}
--app-namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
--platform-info ${{ inputs.test-cluster-name }}
--service-name dotnet-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'
- name: Save test results
if: always()
id: validation-result
run: |
if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then
echo "validation-result=success" >> $GITHUB_OUTPUT
else
echo "validation-result=failure" >> $GITHUB_OUTPUT
fi
# Clean up Procedures
- name: Clean Up Application Signals
if: always()
continue-on-error: true
working-directory: enablement-script
run: |
./clean-app-signals.sh \
${{ inputs.test-cluster-name }} \
${{ env.E2E_TEST_AWS_REGION }} \
${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
# This step also deletes lingering resources from previous test runs
- name: Delete all sample app resources
if: always()
continue-on-error: true
timeout-minutes: 10
run: kubectl delete namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}
- name: Terraform destroy
if: always()
continue-on-error: true
timeout-minutes: 5
working-directory: terraform/dotnet/eks/windows
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}" \
-var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \
-var="kube_directory_path=${{ github.workspace }}/.kube" \
-var="eks_cluster_name=${{ inputs.test-cluster-name }}" \
-var="test_namespace=${{ env.DOTNET_SAMPLE_APP_NAMESPACE }}" \
-var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \
-var="dotnet_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/${{ secrets.APP_SIGNALS_DOTNET_E2E_FE_SA_IMG }}" \
-var="dotnet_remote_app_image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/${{ secrets.APP_SIGNALS_DOTNET_E2E_RE_SA_IMG }}"
- name: Remove aws access service account
if: always()
continue-on-error: true
run: |
eksctl delete iamserviceaccount \
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.DOTNET_SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-cluster-name }} \
--region ${{ env.E2E_TEST_AWS_REGION }}