E2E Preset Test #954

Workflow file for this run

.github/workflows/e2e-preset-test.yml at bd1bcd2

	name: E2E Preset Test

	on:
	workflow_run:
	workflows: ["Build and Push Preset Models"]
	types:
	- completed
	workflow_dispatch:
	inputs:
	force-run-all:
	type: boolean
	default: false
	description: "Test all models for E2E"
	force-run-all-phi-models:
	type: boolean
	default: false
	description: "Test all Phi models for E2E"
	test-on-vllm:
	type: boolean
	default: false
	description: "Test on VLLM runtime"

	env:
	GO_VERSION: "1.22"
	BRANCH_NAME: ${{ github.head_ref \|\| github.ref_name}}
	FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}
	FORCE_RUN_ALL_PHI: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }}
	RUNTIME: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.test-on-vllm == 'true') && 'vllm' \|\| 'hf' }}

	permissions:
	id-token: write
	contents: read

	jobs:
	determine-models:
	if: github.event_name == 'workflow_dispatch' \|\| github.event.workflow_run.conclusion == 'success'
	runs-on: ubuntu-latest
	environment: preset-env
	outputs:
	matrix: ${{ steps.affected_models.outputs.matrix }}
	is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }}
	full_matrix: ${{ steps.images.outputs.full_matrix }}
	steps:
	- name: Checkout
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	submodules: true
	fetch-depth: 0

	# This script should output a JSON array of model names
	- name: Determine Affected Models
	id: affected_models
	run: \|
	PR_BRANCH=${{ env.BRANCH_NAME }} \
	FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
	FORCE_RUN_ALL_PHI=${{ env.FORCE_RUN_ALL_PHI }} \
	python3 .github/workflows/kind-cluster/determine_models.py

	- name: Print Determined Models
	run: \|
	echo "Output from determine_models: ${{ steps.affected_models.outputs.matrix }}"

	- name: Check if Matrix is Empty
	id: check_matrix_empty
	run: \|
	if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] \|\| [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then
	echo "is_empty=true" >> $GITHUB_OUTPUT
	else
	echo "is_empty=false" >> $GITHUB_OUTPUT
	fi

	- name: Add Config info for Testing
	if: steps.check_matrix_empty.outputs.is_empty == 'false'
	id: images
	run: \|
	# Read the additional configurations from e2e-preset-configs.json
	CONFIGS=$(cat .github/e2e-preset-configs.json \| jq -c '.matrix.image')

	echo "CONFIGS:"
	echo $CONFIGS

	# Pseudocode for combining matrices
	# COMBINED_MATRIX = []
	# for model in MATRIX:
	# for config in CONFIGS:
	# if config['name'] == model['name']:
	# combined = {model, config}
	# COMBINED_MATRIX.append(combined)
	# break

	COMBINED_MATRIX=$(echo '${{ steps.affected_models.outputs.matrix }}' \| jq --argjson configs "$CONFIGS" -c '
	map(. as $model \| $configs[] \| select(.name == $model.name) \| $model + .)
	')

	echo "full_matrix=$COMBINED_MATRIX" >> $GITHUB_OUTPUT

	- name: Print Combined Matrix
	if: steps.check_matrix_empty.outputs.is_empty == 'false'
	run: \|
	echo "Combined Matrix:"
	echo '${{ steps.images.outputs.full_matrix }}'

	e2e-preset-tests:
	needs: determine-models
	if: needs.determine-models.outputs.is_matrix_empty == 'false' && (github.event_name == 'workflow_dispatch' \|\| github.event.workflow_run.conclusion == 'success')
	runs-on: ubuntu-latest
	environment: preset-env
	strategy:
	fail-fast: false
	matrix:
	# Ex matrix element:
	# {"name":"falcon-40b","type":"text-generation","version":"#",
	# "runtime":"tfs","tag":"0.0.1","node-count":1,
	# "node-vm-size":"Standard_NC96ads_A100_v4", "node-osdisk-size":400}
	model: ${{fromJson(needs.determine-models.outputs.full_matrix)}}
	steps:
	- name: Checkout
	uses: actions/[email protected]
	with:
	submodules: true
	fetch-depth: 0

	- name: Set OSS Flag
	run: echo "MODEL_IS_OSS=${{ matrix.model.OSS }}" >> $GITHUB_ENV

	- name: 'Az CLI login'
	uses: azure/[email protected]
	with:
	client-id: ${{ secrets.AZURE_CLIENT_ID }}
	tenant-id: ${{ secrets.AZURE_TENANT_ID }}
	allow-no-subscriptions: true

	- name: 'Set ACR Subscription'
	run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}}

	- name: Check if Image exists in ACR
	id: check_image
	run: \|
	ACR_NAME=${{ secrets.ACR_AMRT_USERNAME }}
	IMAGE_NAME=${{ matrix.model.name }}
	TAG=${{ matrix.model.tag }}

	# Use '\|\| true' to prevent script from exiting with an error if the repository is not found
	TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv \|\| true)

	if [[ -z "$TAGS" ]]; then
	echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME."
	echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
	else
	if echo "$TAGS" \| grep -q "^$TAG$"; then
	echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT
	else
	echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
	echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME."
	fi
	fi

	- name: Exit if Image for testing does not exist
	if: steps.check_image.outputs.IMAGE_EXISTS == 'false'
	run: \|
	echo "Image doesn't exist in ACR, remember to build image for testing first (preset-image-build)"
	exit 1

	- name: Set up kubectl context
	run: \|
	az aks get-credentials --resource-group llm-test --name GitRunner

	- name: Get Nodepool Name
	id: get_nodepool_name
	run: \|
	NAME_SUFFIX=${{ matrix.model.name }}
	NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX//-/} # Removing all '-' symbols
	NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX_WITHOUT_DASHES//./} # Removing all '.' symbols

	if [ ${#NAME_SUFFIX_WITHOUT_DASHES} -gt 12 ]; then
	TRUNCATED_NAME_SUFFIX=${NAME_SUFFIX_WITHOUT_DASHES:0:12}
	else
	TRUNCATED_NAME_SUFFIX=$NAME_SUFFIX_WITHOUT_DASHES
	fi
	echo "Nodepool Name: $TRUNCATED_NAME_SUFFIX"
	echo "NODEPOOL_NAME=$TRUNCATED_NAME_SUFFIX" >> $GITHUB_OUTPUT

	- name: Create Nodepool
	run: \|
	NODEPOOL_EXIST=$(az aks nodepool show \
	--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
	--cluster-name GitRunner \
	--resource-group llm-test \
	--query 'name' -o tsv \|\| echo "")
	echo "NODEPOOL_EXIST: $NODEPOOL_EXIST"
	if [ -z "$NODEPOOL_EXIST" ]; then
	az aks nodepool add \
	--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
	--cluster-name GitRunner \
	--resource-group llm-test \
	--node-count ${{ matrix.model.node-count }} \
	--node-vm-size ${{ matrix.model.node-vm-size }} \
	--node-osdisk-size ${{ matrix.model.node-osdisk-size }} \
	--labels pool=${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
	--node-taints sku=gpu:NoSchedule \
	--aks-custom-headers UseGPUDedicatedVHD=true
	else
	NODEPOOL_STATE=$(az aks nodepool show \
	--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
	--cluster-name GitRunner \
	--resource-group llm-test \
	--query 'provisioningState' -o tsv)
	echo "NODEPOOL_STATE: $NODEPOOL_STATE"
	if [ "$NODEPOOL_STATE" != "Succeeded" ]; then
	echo "Nodepool exists but is not in a Succeeded state. Please check manually."
	exit 1
	else
	echo "Nodepool already exists and is in a running state."
	fi
	fi

	- name: Get testing workload
	id: workload
	run: \|
	WORKLOAD_NAME=${{ matrix.model.workload \|\| matrix.model.name }}
	echo "WORKLOAD_NAME=$WORKLOAD_NAME" >> $GITHUB_OUTPUT
	echo "WORKLOAD_FILE_PREFIX=presets/workspace/test/manifests/$WORKLOAD_NAME/$WORKLOAD_NAME" >> $GITHUB_OUTPUT

	- name: Create Service
	run: \|
	kubectl apply -f ${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}-service.yaml

	- name: Retrieve External Service IP
	id: get_ip
	run: \|
	SERVICE_IP=$(kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} -o=jsonpath='{.spec.clusterIP}')
	echo "Service IP is $SERVICE_IP"
	echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT

	- name: Get Resource Type
	id: resource
	run: \|
	RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" \| grep -q "llama" && echo "statefulset" \|\| echo "deployment")
	echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT

	- name: Replace IP and Deploy Resource to K8s
	run: \|
	POSTFIX=$(echo "${{ matrix.model.name }}" \| grep -q "llama" && echo "" \|\| echo "_${{ env.RUNTIME }}")
	WORKLOAD_FILE=${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}$POSTFIX.yaml

	sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" $WORKLOAD_FILE
	sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" $WORKLOAD_FILE
	sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" $WORKLOAD_FILE
	kubectl apply -f $WORKLOAD_FILE

	- name: Wait for Resource to be ready
	run: \|
	kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} --timeout=1800s

	- name: Check Adapter Loading from Logs
	if: matrix.model.loads_adapter == true
	run: \|
	POD_NAME=$(kubectl get pods -l app=${{steps.workload.outputs.WORKLOAD_NAME}} -o jsonpath="{.items[0].metadata.name}")
	kubectl logs $POD_NAME \| grep "Adapter added:" \| grep "${{ matrix.model.expected_adapter }}" \|\| (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)

	- name: Install testing commands
	run: \|
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get update
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get install -y curl

	- name: Test healthz endpoint
	run: \|
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
	curl -s http://localhost:5000/health

	- name: Test inference endpoint
	run: \|
	echo "Testing inference for ${{ matrix.model.name }}"
	if [[ "${{ matrix.model.name }}" == "llama" && "${{ matrix.model.name }}" == "-chat" ]]; then
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
	curl -s -X POST \
	-H "Content-Type: application/json" \
	-d '{
	"input_data": {
	"input_string": [
	[
	{
	"role": "system",
	"content": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe."
	},
	{
	"role": "user",
	"content": "Write a brief birthday message to John"
	}
	]
	]
	}
	}' \
	http://localhost:5000/chat
	elif [[ "${{ matrix.model.name }}" == "llama" ]]; then
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
	curl -s -X POST \
	-H "Content-Type: application/json" \
	-d '{
	"prompts": [
	"I believe the meaning of life is",
	"Simply put, the theory of relativity states that ",
	"A brief message congratulating the team on the launch: Hi everyone, I just ",
	"Translate English to French: sea otter => loutre de mer, peppermint => menthe poivrée, plush girafe => girafe peluche, cheese =>"
	],
	"parameters": {
	"max_gen_len": 128
	}
	}' \
	http://localhost:5000/generate
	elif [[ "${{ env.RUNTIME }}" == "vllm" ]]; then
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
	curl -s -X POST \
	-H "accept: application/json" \
	-H "Content-Type: application/json" \
	-d '{
	"model": "test",
	"messages": [
	{
	"role": "system",
	"content": "You are a helpful assistant."
	},
	{
	"role": "user",
	"content": "Hello!"
	}
	]
	}' \
	http://localhost:5000/v1/chat/completions
	else
	kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
	curl -s -X POST \
	-H "accept: application/json" \
	-H "Content-Type: application/json" \
	-d '{
	"prompt":"Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
	"return_full_text": false,
	"clean_up_tokenization_spaces": false,
	"prefix": null,
	"handle_long_generation": null,
	"generate_kwargs": {
	"max_length":200,
	"min_length":0,
	"do_sample":true,
	"early_stopping":false,
	"num_beams":1,
	"num_beam_groups":1,
	"diversity_penalty":0.0,
	"temperature":1.0,
	"top_k":10,
	"top_p":1,
	"typical_p":1,
	"repetition_penalty":1,
	"length_penalty":1,
	"no_repeat_ngram_size":0,
	"encoder_no_repeat_ngram_size":0,
	"bad_words_ids":null,
	"num_return_sequences":1,
	"output_scores":false,
	"return_dict_in_generate":false,
	"forced_bos_token_id":null,
	"forced_eos_token_id":null,
	"remove_invalid_values":null
	}
	}' \
	http://localhost:5000/chat
	fi

	- name: Cleanup
	if: always()
	run: \|
	# Only proceed if RESOURCE_TYPE is set (else resource wasn't created)
	if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then
	# Use RESOURCE_TYPE from the previous step
	RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}

	# Check and Delete K8s Resource (Deployment or StatefulSet)
	if kubectl get $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
	kubectl logs $RESOURCE_TYPE/${{steps.workload.outputs.WORKLOAD_NAME}}
	kubectl delete $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}}
	fi
	fi

	# Check and Delete K8s Service if it exists
	if kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
	kubectl delete svc ${{steps.workload.outputs.WORKLOAD_NAME}}
	fi

	# Check and Delete AKS Nodepool if it exists
	if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then
	NODEPOOL_EXIST=$(az aks nodepool show \
	--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
	--cluster-name GitRunner \
	--resource-group llm-test \
	--query 'name' -o tsv \|\| echo "")

	if [ -n "$NODEPOOL_EXIST" ]; then
	az aks nodepool delete \
	--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
	--cluster-name GitRunner \
	--resource-group llm-test
	fi
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

E2E Preset Test #954

Workflow file

E2E Preset Test #954

Jobs

Run details

Workflow file for this run