Make encryption-related connectivity tests more reliable #7923
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: External Workloads | |
# Any change in triggers needs to be reflected in the concurrency group. | |
on: | |
### FOR TESTING PURPOSES | |
# This workflow runs in the context of `main`, and ignores changes to | |
# workflow files in PRs. For testing changes to this workflow from a PR: | |
# - Make sure the PR uses a branch from the base repository (requires write | |
# privileges). It will not work with a branch from a fork (missing secrets). | |
# - Uncomment the `pull_request` event below, commit separately with a `DO | |
# NOT MERGE` message, and push to the PR. As long as the commit is present, | |
# any push to the PR will trigger this workflow. | |
# - Don't forget to remove the `DO NOT MERGE` commit once satisfied. The run | |
# will disappear from the PR checks: please provide a direct link to the | |
# successful workflow run (can be found from Actions tab) in a comment. | |
# | |
# pull_request: {} | |
### | |
pull_request_target: {} | |
# Run every 6 hours | |
schedule: | |
- cron: '0 4/6 * * *' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || 'scheduled' }} | |
cancel-in-progress: true | |
env: | |
zone: us-west2-a | |
vmStartupScript: .github/gcp-vm-startup.sh | |
# renovate: datasource=github-releases depName=cilium/cilium | |
cilium_version: v1.14.2 | |
kubectl_version: v1.23.6 | |
USE_GKE_GCLOUD_AUTH_PLUGIN: True | |
jobs: | |
installation-and-connectivity: | |
if: ${{ github.repository == 'cilium/cilium-cli' }} | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 45 | |
strategy: | |
matrix: | |
mode: ["classic", "helm"] | |
steps: | |
- name: Set cluster name | |
run: | | |
echo "clusterName=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ matrix.mode }}-vm${{ github.run_attempt }}" >> $GITHUB_ENV | |
echo "vmName=${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ matrix.mode }}-vm${{ github.run_attempt }}" >> $GITHUB_ENV | |
- name: Checkout | |
uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0 | |
- name: Install kubectl | |
run: | | |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl" | |
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256" | |
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check | |
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl | |
kubectl version --client | |
- name: Set up gcloud credentials | |
id: 'auth' | |
uses: google-github-actions/auth@35b0e87d162680511bf346c299f71c9c5c379033 # v1.1.1 | |
with: | |
credentials_json: '${{ secrets.GCP_PR_SA_KEY }}' | |
- name: Set up gcloud CLI | |
uses: google-github-actions/setup-gcloud@e30db14379863a8c79331b04a9969f4c1e225e0b # v1.1.1 | |
with: | |
project_id: ${{ secrets.GCP_PR_PROJECT_ID }} | |
version: "405.0.0" | |
- name: Install gke-gcloud-auth-plugin | |
run: | | |
gcloud components install gke-gcloud-auth-plugin | |
- name: Display gcloud CLI info | |
run: | | |
gcloud info | |
- name: Set up job variables | |
id: vars | |
run: | | |
if [ ${{ github.event.issue.pull_request || github.event.pull_request }} ]; then | |
PR_API_JSON=$(curl \ | |
-H "Accept: application/vnd.github.v3+json" \ | |
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ | |
${{ github.event.issue.pull_request.url || github.event.pull_request.url }}) | |
SHA=$(echo "$PR_API_JSON" | jq -r ".head.sha") | |
OWNER=$(echo "$PR_API_JSON" | jq -r ".number") | |
else | |
SHA=${{ github.sha }} | |
OWNER=${{ github.sha }} | |
fi | |
echo "sha=${SHA}" >> $GITHUB_OUTPUT | |
echo "owner=${OWNER}" >> $GITHUB_OUTPUT | |
- name: Create GCP VM | |
uses: nick-invision/retry@14672906e672a08bd6eeb15720e9ed3ce869cdd4 # v2.9.0 | |
with: | |
retry_on: error | |
timeout_minutes: 1 | |
max_attempts: 10 | |
command: | | |
gcloud compute instances create ${{ env.vmName }} \ | |
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \ | |
--zone ${{ env.zone }} \ | |
--machine-type e2-custom-2-4096 \ | |
--boot-disk-type pd-standard \ | |
--boot-disk-size 10GB \ | |
--preemptible \ | |
--image-project ubuntu-os-cloud \ | |
--image-family ubuntu-2004-lts \ | |
--metadata hostname=${{ env.vmName }} \ | |
--metadata-from-file startup-script=${{ env.vmStartupScript}} | |
- name: Create GKE cluster | |
id: cluster | |
run: | | |
gcloud container clusters create ${{ env.clusterName }} \ | |
--labels "usage=${{ github.repository_owner }}-${{ github.event.repository.name }},owner=${{ steps.vars.outputs.owner }}" \ | |
--zone ${{ env.zone }} \ | |
--enable-ip-alias \ | |
--cluster-ipv4-cidr="/21" \ | |
--services-ipv4-cidr="/24" \ | |
--image-type COS_CONTAINERD \ | |
--num-nodes 2 \ | |
--machine-type e2-custom-2-4096 \ | |
--disk-type pd-standard \ | |
--disk-size 10GB \ | |
--node-taints node.cilium.io/agent-not-ready=true:NoExecute \ | |
--preemptible | |
CLUSTER_CIDR=$(gcloud container clusters describe ${{ env.clusterName }} --zone ${{ env.zone }} --format="value(clusterIpv4Cidr)") | |
echo "cluster_cidr=${CLUSTER_CIDR}" >> $GITHUB_OUTPUT | |
- name: Get cluster credentials | |
run: | | |
gcloud container clusters get-credentials ${{ env.clusterName }} --zone ${{ env.zone }} | |
- name: Create gcloud-free kubeconfig and load it in configmap | |
run: | | |
.github/get-kubeconfig.sh | |
kubectl create configmap cilium-cli-kubeconfig -n kube-system --from-file kubeconfig | |
- name: Load cilium install script in configmap | |
run: | | |
kubectl create configmap cilium-cli-test-script-install -n kube-system --from-file=in-cluster-test-script.sh=.github/in-cluster-test-scripts/external-workloads-install.sh | |
- name: Create cilium-cli install job | |
run: | | |
helm install .github/cilium-cli-test-job-chart \ | |
--generate-name \ | |
--set tag=${{ steps.vars.outputs.sha }} \ | |
--set cilium_version=${{ env.cilium_version }} \ | |
--set cluster_name=${{ env.clusterName }} \ | |
--set job_name=cilium-cli-install \ | |
--set test_script_cm=cilium-cli-test-script-install \ | |
--set vm_name=${{ env.vmName }} \ | |
--set cluster_cidr=${{ steps.cluster.outputs.cluster_cidr }} \ | |
--set cilium_cli_mode=${{ matrix.mode }} | |
- name: Wait for install job | |
env: | |
timeout: 10m | |
run: | | |
# Background wait for job to complete or timeout | |
kubectl -n kube-system wait job/cilium-cli-install --for=condition=complete --timeout=${{ env.timeout }} & | |
complete_pid=$! | |
# Background wait for job to fail | |
(kubectl -n kube-system wait job/cilium-cli-install --for=condition=failed --timeout=${{ env.timeout }} && exit 1) & | |
failed_pid=$! | |
# Active wait for whichever background process ends first | |
wait -n $complete_pid $failed_pid | |
EXIT_CODE=$? | |
# Retrieve job logs | |
kubectl logs --timestamps -n kube-system job/cilium-cli-install | |
exit ${EXIT_CODE} | |
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently | |
- name: Copy VM install script from cilium-cli-install pod | |
run: | | |
kubectl -n kube-system get configmap install-external-workload-script -o go-template='{{ .data.script }}' > install-external-workload.sh | |
chmod +x install-external-workload.sh | |
- name: Finish installing Cilium on VM | |
run: | | |
gcloud compute scp install-external-workload.sh ${{ env.vmName }}:~/ --zone ${{ env.zone }} | |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \ | |
--command "~/install-external-workload.sh" | |
sleep 5s | |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \ | |
--command "sudo cilium status" | |
- name: Verify cluster DNS on VM | |
run: | | |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \ | |
--command "nslookup -d2 -retry=10 -timeout=5 -norecurse clustermesh-apiserver.kube-system.svc.cluster.local \$(systemd-resolve --status | grep -m 1 \"Current DNS Server:\" | cut -d':' -f2)" | |
- name: Ping clustermesh-apiserver from VM | |
run: | | |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} \ | |
--command "ping -c 3 \$(sudo cilium service list get -o jsonpath='{[?(@.spec.flags.name==\"clustermesh-apiserver\")].spec.backend-addresses[0].ip}')" | |
- name: Load cilium test script in configmap | |
run: | | |
kubectl create configmap cilium-cli-test-script -n kube-system --from-file=in-cluster-test-script.sh=.github/in-cluster-test-scripts/external-workloads.sh | |
- name: Create cilium-cli test job | |
run: | | |
helm install .github/cilium-cli-test-job-chart \ | |
--generate-name \ | |
--set tag=${{ steps.vars.outputs.sha }} \ | |
--set job_name=cilium-cli \ | |
--set test_script_cm=cilium-cli-test-script \ | |
--set cilium_cli_mode=${{ matrix.mode }} | |
- name: Wait for test job | |
env: | |
timeout: 15m | |
run: | | |
# Background wait for job to complete or timeout | |
kubectl -n kube-system wait job/cilium-cli --for=condition=complete --timeout=${{ env.timeout }} & | |
complete_pid=$! | |
# Background wait for job to fail | |
(kubectl -n kube-system wait job/cilium-cli --for=condition=failed --timeout=${{ env.timeout }} && exit 1) & | |
failed_pid=$! | |
# Active wait for whichever background process ends first | |
wait -n $complete_pid $failed_pid | |
EXIT_CODE=$? | |
# Retrieve job logs | |
kubectl logs --timestamps -n kube-system job/cilium-cli | |
exit ${EXIT_CODE} | |
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently | |
- name: Post-test information gathering | |
if: ${{ !success() }} | |
run: | | |
echo "=== Retrieve VM state ===" | |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} --command "sudo cilium status" | |
gcloud compute ssh ${{ env.vmName }} --zone ${{ env.zone }} --command "sudo docker logs cilium --timestamps" | |
echo "=== Install latest stable CLI ===" | |
curl -sSL --remote-name-all https://github.com/cilium/cilium-cli/releases/latest/download/cilium-linux-amd64.tar.gz{,.sha256sum} | |
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum | |
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/bin | |
rm cilium-linux-amd64.tar.gz{,.sha256sum} | |
cilium version | |
echo "=== Retrieve cluster state ===" | |
kubectl get pods --all-namespaces -o wide | |
kubectl get cew --all-namespaces -o wide | |
kubectl get cep --all-namespaces -o wide | |
cilium status | |
cilium clustermesh status | |
cilium clustermesh vm status | |
cilium sysdump --output-filename cilium-sysdump-out | |
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently | |
- name: Clean up GKE | |
if: ${{ always() }} | |
run: | | |
while [ "$(gcloud container operations list --filter="status=RUNNING AND targetLink~${{ env.clusterName }}" --format="value(name)")" ];do | |
echo "cluster has an ongoing operation, waiting for all operations to finish"; sleep 15 | |
done | |
gcloud container clusters delete ${{ env.clusterName }} --zone ${{ env.zone }} --quiet --async | |
gcloud compute instances delete ${{ env.vmName }} --zone ${{ env.zone }} --quiet | |
shell: bash {0} # Disable default fail-fast behavior so that all commands run independently | |
- name: Upload artifacts | |
if: ${{ !success() }} | |
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 | |
with: | |
name: cilium-sysdump-out.zip | |
path: cilium-sysdump-out.zip | |
retention-days: 5 |