Skip to content

Commit

Permalink
Merge branch 'main' into RAGEngine_e2e
Browse files Browse the repository at this point in the history
  • Loading branch information
bangqipropel authored Jan 11, 2025
2 parents 81c064a + 3d04ba6 commit b1c0e23
Show file tree
Hide file tree
Showing 24 changed files with 60 additions and 36 deletions.
14 changes: 13 additions & 1 deletion .github/workflows/helm-chart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: publish_helm_chart

on:
repository_dispatch:
types: [ create-release ]
types: [ publish-helm-chart ]
workflow_dispatch:

permissions:
Expand Down Expand Up @@ -36,3 +36,15 @@ jobs:
charts_dir: charts/kaito
target_dir: charts/kaito
linting: off

create-release:
runs-on: ubuntu-latest
needs: [ publish-helm ]
steps:
- name: 'Dispatch release tag to create a release'
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
event-type: create-release
client-payload: '{"tag": "${{ github.event.client_payload.tag }}"}'

6 changes: 3 additions & 3 deletions .github/workflows/publish-workspace-mcr-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ jobs:
VERSION: ${{ needs.get-tag.outputs.release-tag }}
REGISTRY: ${{ secrets.KAITO_MCR_REGISTRY }}/public/aks/kaito

create-release:
publish-helm-chart:
runs-on: ubuntu-latest
needs: [ build-publish-mcr-image ]
steps:
- name: 'Dispatch release tag'
- name: 'Dispatch release tag for helm chart'
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
event-type: create-release
event-type: publish-helm-chart
client-payload: '{"tag": "${{ github.event.client_payload.tag }}"}'
7 changes: 7 additions & 0 deletions .github/workflows/unit-tests-ragengine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,10 @@ jobs:
- name: Run unit tests
run: |
make rag-service-test
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303 # v5.1.2
with:
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
2 changes: 2 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ jobs:
uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
with:
go-version: ${{ env.GO_VERSION }}

- name: Generate APIs
run: |
make generate
- name: Run unit tests & Generate coverage
run: |
make unit-test
Expand Down
15 changes: 9 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,24 @@ unit-test: ## Run unit tests.
.PHONY: rag-service-test
rag-service-test:
pip install -r presets/ragengine/requirements.txt
pytest -o log_cli=true -o log_cli_level=INFO presets/ragengine/tests
pip install pytest-cov
pytest --cov -o log_cli=true -o log_cli_level=INFO presets/ragengine/tests

.PHONY: tuning-metrics-server-test
tuning-metrics-server-test:
pip install -r ./presets/workspace/dependencies/requirements-test.txt
pytest -o log_cli=true -o log_cli_level=INFO presets/workspace/tuning/text-generation/metrics
pip install pytest-cov
pytest --cov -o log_cli=true -o log_cli_level=INFO presets/workspace/tuning/text-generation/metrics

## --------------------------------------
## E2E tests
## --------------------------------------

inference-api-e2e:
pip install -r ./presets/workspace/dependencies/requirements-test.txt
pytest -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/vllm
pytest -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/text-generation
pip install pytest-cov
pytest --cov -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/vllm
pytest --cov -o log_cli=true -o log_cli_level=INFO presets/workspace/inference/text-generation

# Ginkgo configurations
GINKGO_FOCUS ?=
Expand Down Expand Up @@ -263,8 +266,8 @@ docker-build-ragengine: docker-buildx
--tag $(REGISTRY)/$(RAGENGINE_IMG_NAME):$(RAGENGINE_IMG_TAG) .

.PHONY: docker-build-rag-service
docker-build-ragservice: docker buildx
docker buildx build \
docker-build-ragservice: docker-buildx
docker buildx build \
--platform="linux/$(ARCH)" \
--output=$(OUTPUT_TYPE) \
--file ./docker/ragengine/service/Dockerfile \
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
![GitHub Release](https://img.shields.io/github/v/release/kaito-project/kaito)
[![Go Report Card](https://goreportcard.com/badge/github.com/kaito-project/kaito)](https://goreportcard.com/report/github.com/kaito-project/kaito)
![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/kaito-project/kaito)
[![codecov](https://codecov.io/gh/Azure/kaito/graph/badge.svg?token=XAQLLPB2AR)](https://codecov.io/gh/Azure/kaito)
[![codecov](https://codecov.io/gh/kaito-project/kaito/graph/badge.svg?token=XAQLLPB2AR)](https://codecov.io/gh/kaito-project/kaito)

| ![notification](docs/img/bell.svg) What is NEW! |
|-------------------------------------------------|
Expand Down Expand Up @@ -50,7 +50,7 @@ kind: Workspace
metadata:
name: workspace-phi-3-5-mini
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: phi-3-5
Expand All @@ -65,8 +65,8 @@ The workspace status can be tracked by running the following command. When the W

```sh
$ kubectl get workspace workspace-phi-3-5-mini
NAME INSTANCE RESOURCEREADY INFERENCEREADY JOBSTARTED WORKSPACESUCCEEDED AGE
workspace-phi-3-5-mini Standard_NC6s_v3 True True True 4h15m
NAME INSTANCE RESOURCEREADY INFERENCEREADY JOBSTARTED WORKSPACESUCCEEDED AGE
workspace-phi-3-5-mini Standard_NC24ads_A100_v4 True True True 4h15m
```

Next, one can find the inference service's cluster ip and use a temporal `curl` pod to test the service endpoint in the cluster.
Expand Down
Empty file added codecov.yml
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-custom-llm
resource:
instanceType: "Standard_NC12s_v3" # Replace with the required VM SKU based on model requirements
instanceType: "Standard_NC24ads_A100_v4" # Replace with the required VM SKU based on model requirements
labelSelector:
matchLabels:
apps: custom-llm
Expand Down
4 changes: 2 additions & 2 deletions docs/custom-model-integration/reference-image-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-custom-llm
resource:
instanceType: "Standard_NC12s_v3" # Replace with the required VM SKU based on model requirements
instanceType: "Standard_NC24ads_A100_v4" # Replace with the required VM SKU based on model requirements
labelSelector:
matchLabels:
apps: custom-llm
Expand Down Expand Up @@ -37,4 +37,4 @@ inference:
volumes:
- name: dshm
emptyDir:
medium: Memory
medium: Memory
6 changes: 3 additions & 3 deletions docs/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ kind: Workspace
metadata:
name: workspace-falcon-7b
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: falcon-7b
Expand Down Expand Up @@ -54,7 +54,7 @@ metadata:
annotations:
kaito.sh/runtime: "transformers"
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: falcon-7b
Expand All @@ -73,7 +73,7 @@ kind: Workspace
metadata:
name: workspace-falcon-7b
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: falcon-7b
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_falcon_7b-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-falcon-7b-instruct
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: falcon-7b-instruct
Expand Down
4 changes: 2 additions & 2 deletions examples/inference/kaito_workspace_falcon_7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ kind: Workspace
metadata:
name: workspace-falcon-7b
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: falcon-7b
inference:
preset:
name: "falcon-7b"


Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-falcon-7b
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: falcon-7b
Expand All @@ -15,4 +15,4 @@ inference:
name: "falcon-7b-adapter"
image: "<YOUR_IMAGE>"
strength: "0.2"


2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_llama2_13b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-llama-2-13b-chat
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: llama-2-13b-chat
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_llama2_13b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-llama-2-13b
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: llama-2-13b
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_llama2_7b-chat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-llama-2-7b-chat
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: llama-2-7b-chat
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_llama2_7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-llama-2-7b
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: llama-2-7b
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-mistral-7b-instruct
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: mistral-7b-instruct
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_mistral_7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-mistral-7b
resource:
instanceType: "Standard_NC12s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: mistral-7b
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_phi_2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-phi-2
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: phi-2
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_phi_3.5-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-phi-3-5-mini
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: phi-3-5
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_phi_3_mini_128k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-phi-3-mini
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: phi-3
Expand Down
2 changes: 1 addition & 1 deletion examples/inference/kaito_workspace_phi_3_mini_4k.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-phi-3-mini
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: phi-3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Workspace
metadata:
name: workspace-phi-3-mini-adapter
resource:
instanceType: "Standard_NC6s_v3"
instanceType: "Standard_NC24ads_A100_v4"
labelSelector:
matchLabels:
apps: phi-3-adapter
Expand Down

0 comments on commit b1c0e23

Please sign in to comment.