From 413de7a694f61fe0dc10cdd3d9e4a56e5d8664bd Mon Sep 17 00:00:00 2001 From: Fei Guo Date: Wed, 1 Nov 2023 12:50:20 -0700 Subject: [PATCH] chore: revise README to include quick start (#123) Co-authored-by: guofei --- README.md | 39 +++++++++++++++++-- .../kaito_workspace_falcon_40b-instruct.yaml | 2 - examples/kaito_workspace_falcon_40b.yaml | 2 - .../kaito_workspace_falcon_7b-instruct.yaml | 2 - examples/kaito_workspace_falcon_7b.yaml | 2 - examples/kaito_workspace_llama2_13b-chat.yaml | 5 ++- examples/kaito_workspace_llama2_70b-chat.yaml | 5 ++- examples/kaito_workspace_llama2_7b-chat.yaml | 5 ++- 8 files changed, 45 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index ce61a07ff..32b98b10f 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,41 @@ helm uninstall workspace ## Quick start -TODO. +After installing Kaito, one can try following commands to start a faclon-7b inference service. +``` +$ cat examples/kaito_workspace_falcon_7b.yaml +apiVersion: kaito.sh/v1alpha1 +kind: Workspace +metadata: + name: workspace-falcon-7b +resource: + instanceType: "Standard_NC12s_v3" + labelSelector: + matchLabels: + apps: falcon-7b +inference: + preset: + name: "falcon-7b" + +$ kubectl apply -f examples/kaito_workspace_falcon_7b.yaml +``` +The workspace status can be tracked by running the following command. +``` +$ kubectl get workspace workspace-falcon-7b +NAME INSTANCE RESOURCEREADY INFERENCEREADY WORKSPACEREADY AGE +workspace-falcon-7b Standard_NC12s_v3 True True True 10m + +``` +Once the workspace is ready, one can find the inference service's cluster ip and use a temporal `curl` pod to test the service endpoint in cluster. +``` +$ kubectl get svc workspace-falcon-7b +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +workspace-falcon-7b ClusterIP 80/TCP,29500/TCP 10m + +$ kubectl run -it --rm --restart=Never curl --image=curlimages/curl sh +~ $ curl -X POST http:///chat -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\":\"YOUR QUESTION HERE\"}" +``` ## Contributing @@ -90,12 +123,12 @@ For more information see the [Code of Conduct FAQ](https://opensource.microsoft. contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. ## Trademarks - + This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies. - + ## License See [LICENSE](LICENSE). diff --git a/examples/kaito_workspace_falcon_40b-instruct.yaml b/examples/kaito_workspace_falcon_40b-instruct.yaml index c6eda8fa9..33ea9d447 100644 --- a/examples/kaito_workspace_falcon_40b-instruct.yaml +++ b/examples/kaito_workspace_falcon_40b-instruct.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-falcon-40b-instruct resource: instanceType: "Standard_NC96ads_A100_v4" diff --git a/examples/kaito_workspace_falcon_40b.yaml b/examples/kaito_workspace_falcon_40b.yaml index 5f1031fee..5006aa345 100644 --- a/examples/kaito_workspace_falcon_40b.yaml +++ b/examples/kaito_workspace_falcon_40b.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-falcon-40b resource: instanceType: "Standard_NC96ads_A100_v4" diff --git a/examples/kaito_workspace_falcon_7b-instruct.yaml b/examples/kaito_workspace_falcon_7b-instruct.yaml index 5dd555f7b..95c807b79 100644 --- a/examples/kaito_workspace_falcon_7b-instruct.yaml +++ b/examples/kaito_workspace_falcon_7b-instruct.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-falcon-7b-instruct resource: instanceType: "Standard_NC12s_v3" diff --git a/examples/kaito_workspace_falcon_7b.yaml b/examples/kaito_workspace_falcon_7b.yaml index 733d6436c..4eaf1590d 100644 --- a/examples/kaito_workspace_falcon_7b.yaml +++ b/examples/kaito_workspace_falcon_7b.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-falcon-7b resource: instanceType: "Standard_NC12s_v3" diff --git a/examples/kaito_workspace_llama2_13b-chat.yaml b/examples/kaito_workspace_llama2_13b-chat.yaml index 0eb0a1d8d..1c1078d5c 100644 --- a/examples/kaito_workspace_llama2_13b-chat.yaml +++ b/examples/kaito_workspace_llama2_13b-chat.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-llama-2-13b-chat resource: instanceType: "Standard_NC12s_v3" @@ -12,3 +10,6 @@ resource: inference: preset: name: "llama-2-13b-chat" + accessMode: private + presetOptions: + image: diff --git a/examples/kaito_workspace_llama2_70b-chat.yaml b/examples/kaito_workspace_llama2_70b-chat.yaml index d199e4b91..8ff862e78 100644 --- a/examples/kaito_workspace_llama2_70b-chat.yaml +++ b/examples/kaito_workspace_llama2_70b-chat.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-llama-2-70b-chat resource: instanceType: "Standard_NC96ads_A100_v4" @@ -13,3 +11,6 @@ resource: inference: preset: name: "llama-2-70b-chat" + accessMode: private + presetOptions: + image: diff --git a/examples/kaito_workspace_llama2_7b-chat.yaml b/examples/kaito_workspace_llama2_7b-chat.yaml index 300658b7b..2e6c2fc70 100644 --- a/examples/kaito_workspace_llama2_7b-chat.yaml +++ b/examples/kaito_workspace_llama2_7b-chat.yaml @@ -1,8 +1,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: - annotations: - kubernetes-kaito.sh/service-type: load-balancer name: workspace-llama-2-7b-chat resource: instanceType: "Standard_NC12s_v3" @@ -12,3 +10,6 @@ resource: inference: preset: name: "llama-2-7b-chat" + accessMode: private + presetOptions: + image: