Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: Part 4.5 - Fix Minor Bugs in E2E Preset Pipeline #222

Merged
merged 8 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 34 additions & 29 deletions .github/workflows/e2e-preset-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,16 @@ jobs:
')

echo "matrix=$COMBINED_MATRIX" >> $GITHUB_OUTPUT


- name: Print Combined Matrix
run: |
echo "Combined Matrix:"
echo '${{ steps.images.outputs.matrix }}'
e2e-preset-tests:
if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
needs: determine-models
runs-on: [self-hosted, 'username:runner-2','username:runner-3']
runs-on: ubuntu-latest
environment: e2e-test
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -93,8 +98,8 @@ jobs:
id: check_test_image
run: |
ACR_NAME=${{ secrets.ACR_AMRT_USERNAME }}
IMAGE_NAME=${{ matrix.name }}
TAG=${{ matrix.tag }}
IMAGE_NAME=${{ matrix.model.name }}
TAG=${{ matrix.model.tag }}

TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv)

Expand All @@ -109,8 +114,8 @@ jobs:
id: check_prod_image
run: |
ACR_NAME=${{ secrets.ACR_AMR_USERNAME }}
IMAGE_NAME=${{ matrix.name }}
TAG=${{ matrix.tag }}
IMAGE_NAME=${{ matrix.model.name }}
TAG=${{ matrix.model.tag }}

TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv)

Expand All @@ -135,7 +140,7 @@ jobs:
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
id: get_nodepool_name
run: |
NAME_SUFFIX=${{ matrix.name }}
NAME_SUFFIX=${{ matrix.model.name }}
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX//-/} # Removing all '-' symbols

if [ ${#NAME_SUFFIX_WITHOUT_DASHES} -gt 12 ]; then
Expand All @@ -160,9 +165,9 @@ jobs:
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--cluster-name GitRunner \
--resource-group llm-test \
--node-count ${{ matrix.node-count }} \
--node-vm-size ${{ matrix.node-vm-size }} \
--node-osdisk-size ${{ matrix.node-osdisk-size }} \
--node-count ${{ matrix.model.node-count }} \
--node-vm-size ${{ matrix.model.node-vm-size }} \
--node-osdisk-size ${{ matrix.model.node-osdisk-size }} \
--labels pool=${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--node-taints sku=gpu:NoSchedule \
--aks-custom-headers UseGPUDedicatedVHD=true
Expand All @@ -183,14 +188,14 @@ jobs:

- name: Create Service
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
run: kubectl apply -f presets/test/manifests/${{ matrix.name }}/${{ matrix.name }}-service.yaml
run: kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-service.yaml

- name: Retrieve External Service IP
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
id: get_ip
run: |
while [[ -z $SERVICE_IP ]]; do
SERVICE_IP=$(kubectl get svc ${{ matrix.name }} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}')
SERVICE_IP=$(kubectl get svc ${{ matrix.model.name }} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}')
sleep 5
done
echo "Service IP is $SERVICE_IP"
Expand All @@ -199,15 +204,15 @@ jobs:
- name: Replace IP and Deploy Statefulset to K8s
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
run: |
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.name }}/${{ matrix.name }}-statefulset.yaml
sed -i "s/TAG_HERE/${{ matrix.tag }}/g" presets/test/manifests/${{ matrix.name }}/${{ matrix.name }}-statefulset.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.name }}/${{ matrix.name }}-statefulset.yaml
kubectl apply -f presets/test/manifests/${{ matrix.name }}/${{ matrix.name }}-statefulset.yaml
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml
kubectl apply -f presets/test/manifests/${{ matrix.model.name }}/${{ matrix.model.name }}-statefulset.yaml

- name: Wait for Statefulset to be ready
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
run: |
kubectl rollout status statefulset/${{ matrix.name }}
kubectl rollout status statefulset/${{ matrix.model.name }}

- name: Test home endpoint
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
Expand All @@ -222,8 +227,8 @@ jobs:
- name: Test inference endpoint
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'true' && steps.check_prod_image.outputs.IMAGE_EXISTS == 'false'
run: |
if [[ "${{ matrix.name }}" == *"llama"* && "${{ matrix.name }}" == *"-chat"* ]]; then
echo "Testing inference for ${{ matrix.name }}"
if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then
echo "Testing inference for ${{ matrix.model.name }}"
curl -X POST \
-H "Content-Type: application/json" \
-d '{
Expand All @@ -243,8 +248,8 @@ jobs:
}
}' \
http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/chat
elif [[ "${{ matrix.name }}" == *"llama"* ]]; then
echo "Testing inference for ${{ matrix.name }}"
elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then
echo "Testing inference for ${{ matrix.model.name }}"
curl -X POST \
-H "Content-Type: application/json" \
-d '{
Expand All @@ -259,8 +264,8 @@ jobs:
}
}' \
http://${{ steps.get_ip.outputs.SERVICE_IP }}:80/generate
elif [[ "${{ matrix.name }}" == *"falcon"* ]]; then
echo "Testing inference for ${{ matrix.name }}"
elif [[ "${{ matrix.model.name }}" == *"falcon"* ]]; then
echo "Testing inference for ${{ matrix.model.name }}"
curl -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
Expand All @@ -278,8 +283,8 @@ jobs:

TEST_ACR_NAME=${{ secrets.ACR_AMRT_USERNAME }}
PROD_ACR_NAME=${{ secrets.ACR_AMR_USERNAME }}
IMAGE_NAME=${{ matrix.name }}
TAG=${{ matrix.tag }}
IMAGE_NAME=${{ matrix.model.name }}
TAG=${{ matrix.model.tag }}

# Formulate the source image reference
SOURCE_IMAGE="$TEST_ACR_NAME.azurecr.io/$IMAGE_NAME:$TAG"
Expand All @@ -291,13 +296,13 @@ jobs:
if: always()
run: |
# Check and Delete K8s Service if it exists
if kubectl get svc ${{ matrix.name }} > /dev/null 2>&1; then
kubectl delete svc ${{ matrix.name }}
if kubectl get svc ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl delete svc ${{ matrix.model.name }}
fi

# Check and Delete K8s StatefulSet if it exists
if kubectl get statefulset ${{ matrix.name }} > /dev/null 2>&1; then
kubectl delete statefulset ${{ matrix.name }}
if kubectl get statefulset ${{ matrix.model.name }} > /dev/null 2>&1; then
kubectl delete statefulset ${{ matrix.model.name }}
fi

# Check and Delete AKS Nodepool if it exists
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/preset-image-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
build-models:
needs: determine-models
if: needs.determine-models.outputs.is_matrix_empty == 'false'
runs-on: [self-hosted, 'username:runner-2', 'username:runner-3']
runs-on: [self-hosted, 'hostname:model-server']
strategy:
fail-fast: false
matrix:
Expand Down
Loading