From dcc0b3b90dba5bdf7c71230a06392ae1a65ac0b6 Mon Sep 17 00:00:00 2001 From: Ishaan Sehgal Date: Thu, 9 Jan 2025 17:54:04 -0800 Subject: [PATCH 1/4] fix: Update CodeCov Badge Signed-off-by: Ishaan Sehgal --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 072286679..86cd0dcd4 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ![GitHub Release](https://img.shields.io/github/v/release/kaito-project/kaito) [![Go Report Card](https://goreportcard.com/badge/github.com/kaito-project/kaito)](https://goreportcard.com/report/github.com/kaito-project/kaito) ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/kaito-project/kaito) -[![codecov](https://codecov.io/gh/Azure/kaito/graph/badge.svg?token=XAQLLPB2AR)](https://codecov.io/gh/Azure/kaito) +[![codecov](https://codecov.io/gh/kaito-project/kaito/graph/badge.svg?token=XAQLLPB2AR)](https://codecov.io/gh/kaito-project/kaito) | ![notification](docs/img/bell.svg) What is NEW! | |-------------------------------------------------| From 5ad5cae237bfe2f449223771a4ecc4ef122bb94f Mon Sep 17 00:00:00 2001 From: Heba <31887807+helayoty@users.noreply.github.com> Date: Thu, 9 Jan 2025 18:26:58 -0800 Subject: [PATCH 2/4] ci: Make publish helm chart and create release workflows sequential (#814) **Reason for Change**: As part of tighten the security for Kaito pipelines, we enforced approval-based for all workflows that perform writing to the repo. One of the side effects of this was a required approval for the auto triggered 'pages build and deployment' workflow which was missing - and it will get canceled when creating a new release - and caused the helm github page repo to not get updated (#808). As a solution to that, we will change the Make publish helm chart and create release workflows to run sequentially instead of parallel, so new release won't get created until all (including github pages) are updated successfully. **Requirements** - [ ] added unit tests and e2e tests (if applicable). **Issue Fixed**: **Notes for Reviewers**: Signed-off-by: Heba Elayoty --- .github/workflows/helm-chart.yml | 14 +++++++++++++- .github/workflows/publish-workspace-mcr-image.yml | 6 +++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/helm-chart.yml b/.github/workflows/helm-chart.yml index d6374f0b5..325738b90 100644 --- a/.github/workflows/helm-chart.yml +++ b/.github/workflows/helm-chart.yml @@ -2,7 +2,7 @@ name: publish_helm_chart on: repository_dispatch: - types: [ create-release ] + types: [ publish-helm-chart ] workflow_dispatch: permissions: @@ -36,3 +36,15 @@ jobs: charts_dir: charts/kaito target_dir: charts/kaito linting: off + + create-release: + runs-on: ubuntu-latest + needs: [ publish-helm ] + steps: + - name: 'Dispatch release tag to create a release' + uses: peter-evans/repository-dispatch@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + event-type: create-release + client-payload: '{"tag": "${{ github.event.client_payload.tag }}"}' + diff --git a/.github/workflows/publish-workspace-mcr-image.yml b/.github/workflows/publish-workspace-mcr-image.yml index 2b3d54b79..97b73ca43 100644 --- a/.github/workflows/publish-workspace-mcr-image.yml +++ b/.github/workflows/publish-workspace-mcr-image.yml @@ -46,13 +46,13 @@ jobs: VERSION: ${{ needs.get-tag.outputs.release-tag }} REGISTRY: ${{ secrets.KAITO_MCR_REGISTRY }}/public/aks/kaito - create-release: + publish-helm-chart: runs-on: ubuntu-latest needs: [ build-publish-mcr-image ] steps: - - name: 'Dispatch release tag' + - name: 'Dispatch release tag for helm chart' uses: peter-evans/repository-dispatch@v3 with: token: ${{ secrets.GITHUB_TOKEN }} - event-type: create-release + event-type: publish-helm-chart client-payload: '{"tag": "${{ github.event.client_payload.tag }}"}' From b4d9a85067016a9e21e6aefff592e4e54a253ec4 Mon Sep 17 00:00:00 2001 From: Heba <31887807+helayoty@users.noreply.github.com> Date: Thu, 9 Jan 2025 20:46:16 -0800 Subject: [PATCH 3/4] test: Add RAG and other Python UT coverage to the codecov report (#815) **Reason for Change**: Add RAG and other Python code UT coverage to the codecov report to ensure that the RAG server changes for consistency are tested and the code coverage is reported accurately. **Requirements** - [ ] added unit tests and e2e tests (if applicable). **Issue Fixed**: **Notes for Reviewers**: --------- Signed-off-by: Heba Elayoty --- .github/workflows/unit-tests-ragengine.yml | 7 +++++++ .github/workflows/unit-tests.yml | 2 ++ Makefile | 15 +++++++++------ codecov.yml | 0 4 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 codecov.yml diff --git a/.github/workflows/unit-tests-ragengine.yml b/.github/workflows/unit-tests-ragengine.yml index 144ef4658..bfec02a4c 100644 --- a/.github/workflows/unit-tests-ragengine.yml +++ b/.github/workflows/unit-tests-ragengine.yml @@ -38,3 +38,10 @@ jobs: - name: Run unit tests run: | make rag-service-test + + - name: Upload Coverage to Codecov + uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303 # v5.1.2 + with: + fail_ci_if_error: true + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a6e20728e..637b4d8b0 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -39,9 +39,11 @@ jobs: uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: ${{ env.GO_VERSION }} + - name: Generate APIs run: | make generate + - name: Run unit tests & Generate coverage run: | make unit-test diff --git a/Makefile b/Makefile index d78cfc18f..0d1226a2f 100644 --- a/Makefile +++ b/Makefile @@ -105,12 +105,14 @@ unit-test: ## Run unit tests. .PHONY: rag-service-test rag-service-test: pip install -r presets/ragengine/requirements.txt - pytest -o log_cli=true -o log_cli_level=INFO presets/ragengine/tests + pip install pytest-cov + pytest --cov -o log_cli=true -o log_cli_level=INFO presets/ragengine/tests .PHONY: tuning-metrics-server-test tuning-metrics-server-test: pip install -r ./presets/workspace/dependencies/requirements-test.txt - pytest -o log_cli=true -o log_cli_level=INFO presets/workspace/tuning/text-generation/metrics + pip install pytest-cov + pytest --cov -o log_cli=true -o log_cli_level=INFO presets/workspace/tuning/text-generation/metrics ## -------------------------------------- ## E2E tests @@ -118,8 +120,9 @@ tuning-metrics-server-test: inference-api-e2e: pip install -r ./presets/workspace/dependencies/requirements-test.txt - pytest -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/vllm - pytest -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/text-generation + pip install pytest-cov + pytest --cov -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/vllm + pytest --cov -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/text-generation # Ginkgo configurations GINKGO_FOCUS ?= @@ -241,8 +244,8 @@ docker-build-ragengine: docker-buildx --tag $(REGISTRY)/$(RAGENGINE_IMG_NAME):$(RAGENGINE_IMG_TAG) . .PHONY: docker-build-rag-service -docker-build-ragservice: docker buildx - docker buildx build \ +docker-build-ragservice: docker-buildx + docker buildx build \ --platform="linux/$(ARCH)" \ --output=$(OUTPUT_TYPE) \ --file ./docker/ragengine/service/Dockerfile \ diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 000000000..e69de29bb From 3d04ba62f4dd7f8e9ec9b155326f73e31c5e2947 Mon Sep 17 00:00:00 2001 From: Fei Guo Date: Fri, 10 Jan 2025 13:29:38 -0800 Subject: [PATCH 4/4] docs: Update to use Standard_NC24ads_A100_v4 as default SKU in docs (#818) v3 GPU SKUs have been deprecated by Azure. Hence, using v4 GPU as a default SKU in the documentation. --- README.md | 6 +++--- .../custom-deployment-template.yaml | 2 +- .../reference-image-deployment.yaml | 4 ++-- docs/inference/README.md | 6 +++--- examples/inference/kaito_workspace_falcon_7b-instruct.yaml | 2 +- examples/inference/kaito_workspace_falcon_7b.yaml | 4 ++-- .../inference/kaito_workspace_falcon_7b_with_adapters.yaml | 4 ++-- examples/inference/kaito_workspace_llama2_13b-chat.yaml | 2 +- examples/inference/kaito_workspace_llama2_13b.yaml | 2 +- examples/inference/kaito_workspace_llama2_7b-chat.yaml | 2 +- examples/inference/kaito_workspace_llama2_7b.yaml | 2 +- examples/inference/kaito_workspace_mistral_7b-instruct.yaml | 2 +- examples/inference/kaito_workspace_mistral_7b.yaml | 2 +- examples/inference/kaito_workspace_phi_2.yaml | 2 +- examples/inference/kaito_workspace_phi_3.5-instruct.yaml | 2 +- examples/inference/kaito_workspace_phi_3_mini_128k.yaml | 2 +- examples/inference/kaito_workspace_phi_3_mini_4k.yaml | 2 +- examples/inference/kaito_workspace_phi_3_with_adapters.yaml | 2 +- 18 files changed, 25 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 86cd0dcd4..54d208fab 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ kind: Workspace metadata: name: workspace-phi-3-5-mini resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: phi-3-5 @@ -65,8 +65,8 @@ The workspace status can be tracked by running the following command. When the W ```sh $ kubectl get workspace workspace-phi-3-5-mini -NAME INSTANCE RESOURCEREADY INFERENCEREADY JOBSTARTED WORKSPACESUCCEEDED AGE -workspace-phi-3-5-mini Standard_NC6s_v3 True True True 4h15m +NAME INSTANCE RESOURCEREADY INFERENCEREADY JOBSTARTED WORKSPACESUCCEEDED AGE +workspace-phi-3-5-mini Standard_NC24ads_A100_v4 True True True 4h15m ``` Next, one can find the inference service's cluster ip and use a temporal `curl` pod to test the service endpoint in the cluster. diff --git a/docs/custom-model-integration/custom-deployment-template.yaml b/docs/custom-model-integration/custom-deployment-template.yaml index 999e81c07..fe9c2c4ad 100644 --- a/docs/custom-model-integration/custom-deployment-template.yaml +++ b/docs/custom-model-integration/custom-deployment-template.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-custom-llm resource: - instanceType: "Standard_NC12s_v3" # Replace with the required VM SKU based on model requirements + instanceType: "Standard_NC24ads_A100_v4" # Replace with the required VM SKU based on model requirements labelSelector: matchLabels: apps: custom-llm diff --git a/docs/custom-model-integration/reference-image-deployment.yaml b/docs/custom-model-integration/reference-image-deployment.yaml index c3bb75171..3a77dba08 100644 --- a/docs/custom-model-integration/reference-image-deployment.yaml +++ b/docs/custom-model-integration/reference-image-deployment.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-custom-llm resource: - instanceType: "Standard_NC12s_v3" # Replace with the required VM SKU based on model requirements + instanceType: "Standard_NC24ads_A100_v4" # Replace with the required VM SKU based on model requirements labelSelector: matchLabels: apps: custom-llm @@ -37,4 +37,4 @@ inference: volumes: - name: dshm emptyDir: - medium: Memory \ No newline at end of file + medium: Memory diff --git a/docs/inference/README.md b/docs/inference/README.md index bf28ef835..765012f5d 100644 --- a/docs/inference/README.md +++ b/docs/inference/README.md @@ -12,7 +12,7 @@ kind: Workspace metadata: name: workspace-falcon-7b resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: falcon-7b @@ -54,7 +54,7 @@ metadata: annotations: kaito.sh/runtime: "transformers" resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: falcon-7b @@ -73,7 +73,7 @@ kind: Workspace metadata: name: workspace-falcon-7b resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: falcon-7b diff --git a/examples/inference/kaito_workspace_falcon_7b-instruct.yaml b/examples/inference/kaito_workspace_falcon_7b-instruct.yaml index 95c807b79..80188c2db 100644 --- a/examples/inference/kaito_workspace_falcon_7b-instruct.yaml +++ b/examples/inference/kaito_workspace_falcon_7b-instruct.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-falcon-7b-instruct resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: falcon-7b-instruct diff --git a/examples/inference/kaito_workspace_falcon_7b.yaml b/examples/inference/kaito_workspace_falcon_7b.yaml index afb813757..b5531f678 100644 --- a/examples/inference/kaito_workspace_falcon_7b.yaml +++ b/examples/inference/kaito_workspace_falcon_7b.yaml @@ -3,11 +3,11 @@ kind: Workspace metadata: name: workspace-falcon-7b resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: falcon-7b inference: preset: name: "falcon-7b" - \ No newline at end of file + diff --git a/examples/inference/kaito_workspace_falcon_7b_with_adapters.yaml b/examples/inference/kaito_workspace_falcon_7b_with_adapters.yaml index e2ce58dec..f62b1b7a3 100644 --- a/examples/inference/kaito_workspace_falcon_7b_with_adapters.yaml +++ b/examples/inference/kaito_workspace_falcon_7b_with_adapters.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-falcon-7b resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: falcon-7b @@ -15,4 +15,4 @@ inference: name: "falcon-7b-adapter" image: "" strength: "0.2" - \ No newline at end of file + diff --git a/examples/inference/kaito_workspace_llama2_13b-chat.yaml b/examples/inference/kaito_workspace_llama2_13b-chat.yaml index 45c8a3b57..042547ee8 100644 --- a/examples/inference/kaito_workspace_llama2_13b-chat.yaml +++ b/examples/inference/kaito_workspace_llama2_13b-chat.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-llama-2-13b-chat resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: llama-2-13b-chat diff --git a/examples/inference/kaito_workspace_llama2_13b.yaml b/examples/inference/kaito_workspace_llama2_13b.yaml index 8a0923cd6..8dec0dbe3 100644 --- a/examples/inference/kaito_workspace_llama2_13b.yaml +++ b/examples/inference/kaito_workspace_llama2_13b.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-llama-2-13b resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: llama-2-13b diff --git a/examples/inference/kaito_workspace_llama2_7b-chat.yaml b/examples/inference/kaito_workspace_llama2_7b-chat.yaml index b1c68544a..d16d5a089 100644 --- a/examples/inference/kaito_workspace_llama2_7b-chat.yaml +++ b/examples/inference/kaito_workspace_llama2_7b-chat.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-llama-2-7b-chat resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: llama-2-7b-chat diff --git a/examples/inference/kaito_workspace_llama2_7b.yaml b/examples/inference/kaito_workspace_llama2_7b.yaml index ba72eb3eb..63536c5c8 100644 --- a/examples/inference/kaito_workspace_llama2_7b.yaml +++ b/examples/inference/kaito_workspace_llama2_7b.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-llama-2-7b resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: llama-2-7b diff --git a/examples/inference/kaito_workspace_mistral_7b-instruct.yaml b/examples/inference/kaito_workspace_mistral_7b-instruct.yaml index 6a7539d09..1d1134978 100644 --- a/examples/inference/kaito_workspace_mistral_7b-instruct.yaml +++ b/examples/inference/kaito_workspace_mistral_7b-instruct.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-mistral-7b-instruct resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: mistral-7b-instruct diff --git a/examples/inference/kaito_workspace_mistral_7b.yaml b/examples/inference/kaito_workspace_mistral_7b.yaml index 47f69c995..59b86acad 100644 --- a/examples/inference/kaito_workspace_mistral_7b.yaml +++ b/examples/inference/kaito_workspace_mistral_7b.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-mistral-7b resource: - instanceType: "Standard_NC12s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: mistral-7b diff --git a/examples/inference/kaito_workspace_phi_2.yaml b/examples/inference/kaito_workspace_phi_2.yaml index d1bb49eea..3310b82b7 100644 --- a/examples/inference/kaito_workspace_phi_2.yaml +++ b/examples/inference/kaito_workspace_phi_2.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-phi-2 resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: phi-2 diff --git a/examples/inference/kaito_workspace_phi_3.5-instruct.yaml b/examples/inference/kaito_workspace_phi_3.5-instruct.yaml index 4c2497fe3..da17357c0 100644 --- a/examples/inference/kaito_workspace_phi_3.5-instruct.yaml +++ b/examples/inference/kaito_workspace_phi_3.5-instruct.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-phi-3-5-mini resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: phi-3-5 diff --git a/examples/inference/kaito_workspace_phi_3_mini_128k.yaml b/examples/inference/kaito_workspace_phi_3_mini_128k.yaml index 162495ef8..af85d80d2 100644 --- a/examples/inference/kaito_workspace_phi_3_mini_128k.yaml +++ b/examples/inference/kaito_workspace_phi_3_mini_128k.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-phi-3-mini resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: phi-3 diff --git a/examples/inference/kaito_workspace_phi_3_mini_4k.yaml b/examples/inference/kaito_workspace_phi_3_mini_4k.yaml index 33cd49d68..151feb094 100644 --- a/examples/inference/kaito_workspace_phi_3_mini_4k.yaml +++ b/examples/inference/kaito_workspace_phi_3_mini_4k.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-phi-3-mini resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: phi-3 diff --git a/examples/inference/kaito_workspace_phi_3_with_adapters.yaml b/examples/inference/kaito_workspace_phi_3_with_adapters.yaml index 8dd754c48..69edb4692 100644 --- a/examples/inference/kaito_workspace_phi_3_with_adapters.yaml +++ b/examples/inference/kaito_workspace_phi_3_with_adapters.yaml @@ -3,7 +3,7 @@ kind: Workspace metadata: name: workspace-phi-3-mini-adapter resource: - instanceType: "Standard_NC6s_v3" + instanceType: "Standard_NC24ads_A100_v4" labelSelector: matchLabels: apps: phi-3-adapter