From 622c1f93ba3088bd43b7ae130256c8048ec89c02 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 27 Feb 2024 15:57:27 -0800 Subject: [PATCH 01/10] fix: add test case --- pkg/utils/testUtils.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/utils/testUtils.go b/pkg/utils/testUtils.go index f88b35a4f..bdb1eb621 100644 --- a/pkg/utils/testUtils.go +++ b/pkg/utils/testUtils.go @@ -140,6 +140,15 @@ var ( }, }, }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node4", + Labels: map[string]string{ + corev1.LabelInstanceTypeStable: "Standard_NC12s_v3", + }, + DeletionTimestamp: &metav1.Time{Time: time.Now()}, + }, + }, } ) From 0c5b71b86c9d0fc05a01e9ee9457d395b9301fc1 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 27 Feb 2024 15:59:00 -0800 Subject: [PATCH 02/10] fix: add import --- pkg/utils/testUtils.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/utils/testUtils.go b/pkg/utils/testUtils.go index bdb1eb621..185af6049 100644 --- a/pkg/utils/testUtils.go +++ b/pkg/utils/testUtils.go @@ -4,6 +4,7 @@ package utils import ( + "time" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" "github.com/azure/kaito/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" From 650be05858b7b86641cb7f057cc8c9478ed90a4f Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 27 Feb 2024 16:36:37 -0800 Subject: [PATCH 03/10] fix: upgrade mistral --- pkg/utils/testUtils.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pkg/utils/testUtils.go b/pkg/utils/testUtils.go index 185af6049..f88b35a4f 100644 --- a/pkg/utils/testUtils.go +++ b/pkg/utils/testUtils.go @@ -4,7 +4,6 @@ package utils import ( - "time" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" "github.com/azure/kaito/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" @@ -141,15 +140,6 @@ var ( }, }, }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node4", - Labels: map[string]string{ - corev1.LabelInstanceTypeStable: "Standard_NC12s_v3", - }, - DeletionTimestamp: &metav1.Time{Time: time.Now()}, - }, - }, } ) From 02040be38a606d43a7d9b4670a846327688d420a Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 14:17:20 -0800 Subject: [PATCH 04/10] fix: Add test case --- README.md | 39 ++++++++++++++++++- .../kaito_workspace_falcon_40b-instruct.yaml | 2 + examples/kaito_workspace_falcon_40b.yaml | 2 + .../kaito_workspace_falcon_7b-instruct.yaml | 2 + examples/kaito_workspace_falcon_7b.yaml | 2 + examples/kaito_workspace_llama2_13b-chat.yaml | 2 + examples/kaito_workspace_llama2_13b.yaml | 2 + examples/kaito_workspace_llama2_70b-chat.yaml | 2 + examples/kaito_workspace_llama2_70b.yaml | 2 + examples/kaito_workspace_llama2_7b-chat.yaml | 2 + examples/kaito_workspace_llama2_7b.yaml | 2 + .../kaito_workspace_mistral_7b-instruct.yaml | 2 + examples/kaito_workspace_mistral_7b.yaml | 2 + examples/kaito_workspace_phi-2.yaml | 2 + 14 files changed, 64 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 242ea325c..bf8669f2e 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Using Kaito, the workflow of onboarding large AI inference models in Kubernetes Kaito follows the classic Kubernetes Custom Resource Definition(CRD)/controller design pattern. User manages a `workspace` custom resource which describes the GPU requirements and the inference specification. Kaito controllers will automate the deployment by reconciling the `workspace` custom resource.
- + Kaito architecture
The above figure presents the Kaito architecture overview. Its major components consist of: @@ -95,6 +95,43 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +## FAQ + +### How to Update Model/Inference Parameters? + +To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. +For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute: + +``` +kubectl edit deployment workspace-falcon-7b-instruct +``` + +Within the deployment configuration, locate the command section and modify it as follows: + +Original command: +``` +accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16 +``` +Modified command to enable 4-bit Quantization +``` +accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16 --load_in_4bit +``` + +For a comprehensive list of inference parameters for the text-generation models, refer to the following options: +- `pipeline`: The model pipeline for the pre-trained model. For text-generation models this can be either `text-generation` or `conversational` +- `pretrained_model_name_or_path`: Path to the pretrained model or model identifier from huggingface.co/models. +- Additional parameters such as `state_dict`, `cache_dir`, `from_tf`, `force_download`, `resume_download`, `proxies`, `output_loading_info`, `allow_remote_files`, `revision`, `trust_remote_code`, `load_in_4bit`, `load_in_8bit`, `torch_dtype`, and `device_map` can also be customized as needed. + +You can also introduce new parameters not listed above, which will be passed directly into the model. + +### What is the Difference Between Instruct and Non-Instruct Models? +The main distinction lies in their intended use cases. Instruct models are fine-tuned versions optimized +for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in +conversational contexts. + +On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to +apply fine-tuned weights to these raw models, enhancing their functionality and application scope. + ## Trademarks This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general). diff --git a/examples/kaito_workspace_falcon_40b-instruct.yaml b/examples/kaito_workspace_falcon_40b-instruct.yaml index 33ea9d447..310a4b92b 100644 --- a/examples/kaito_workspace_falcon_40b-instruct.yaml +++ b/examples/kaito_workspace_falcon_40b-instruct.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-40b-instruct + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" labelSelector: diff --git a/examples/kaito_workspace_falcon_40b.yaml b/examples/kaito_workspace_falcon_40b.yaml index 5006aa345..8ab0ca5b0 100644 --- a/examples/kaito_workspace_falcon_40b.yaml +++ b/examples/kaito_workspace_falcon_40b.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-40b + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" labelSelector: diff --git a/examples/kaito_workspace_falcon_7b-instruct.yaml b/examples/kaito_workspace_falcon_7b-instruct.yaml index 95c807b79..4bed252b4 100644 --- a/examples/kaito_workspace_falcon_7b-instruct.yaml +++ b/examples/kaito_workspace_falcon_7b-instruct.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-7b-instruct + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_falcon_7b.yaml b/examples/kaito_workspace_falcon_7b.yaml index 4eaf1590d..259b8959b 100644 --- a/examples/kaito_workspace_falcon_7b.yaml +++ b/examples/kaito_workspace_falcon_7b.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-7b + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_13b-chat.yaml b/examples/kaito_workspace_llama2_13b-chat.yaml index 45c8a3b57..3573d83a5 100644 --- a/examples/kaito_workspace_llama2_13b-chat.yaml +++ b/examples/kaito_workspace_llama2_13b-chat.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-13b-chat + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_13b.yaml b/examples/kaito_workspace_llama2_13b.yaml index 8a0923cd6..242ddfde8 100644 --- a/examples/kaito_workspace_llama2_13b.yaml +++ b/examples/kaito_workspace_llama2_13b.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-13b + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_70b-chat.yaml b/examples/kaito_workspace_llama2_70b-chat.yaml index 18b5e4835..7456227d3 100644 --- a/examples/kaito_workspace_llama2_70b-chat.yaml +++ b/examples/kaito_workspace_llama2_70b-chat.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-70b-chat + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" count: 2 diff --git a/examples/kaito_workspace_llama2_70b.yaml b/examples/kaito_workspace_llama2_70b.yaml index 891440f03..1fb6e38aa 100644 --- a/examples/kaito_workspace_llama2_70b.yaml +++ b/examples/kaito_workspace_llama2_70b.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-70b + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" count: 2 diff --git a/examples/kaito_workspace_llama2_7b-chat.yaml b/examples/kaito_workspace_llama2_7b-chat.yaml index b1c68544a..e9f49c21a 100644 --- a/examples/kaito_workspace_llama2_7b-chat.yaml +++ b/examples/kaito_workspace_llama2_7b-chat.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-7b-chat + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_7b.yaml b/examples/kaito_workspace_llama2_7b.yaml index ba72eb3eb..6d33f5ab5 100644 --- a/examples/kaito_workspace_llama2_7b.yaml +++ b/examples/kaito_workspace_llama2_7b.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-7b + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_mistral_7b-instruct.yaml b/examples/kaito_workspace_mistral_7b-instruct.yaml index 6a7539d09..927c80515 100644 --- a/examples/kaito_workspace_mistral_7b-instruct.yaml +++ b/examples/kaito_workspace_mistral_7b-instruct.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-mistral-7b-instruct + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_mistral_7b.yaml b/examples/kaito_workspace_mistral_7b.yaml index 47f69c995..275c930d6 100644 --- a/examples/kaito_workspace_mistral_7b.yaml +++ b/examples/kaito_workspace_mistral_7b.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-mistral-7b + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_phi-2.yaml b/examples/kaito_workspace_phi-2.yaml index d1bb49eea..86c44c40f 100644 --- a/examples/kaito_workspace_phi-2.yaml +++ b/examples/kaito_workspace_phi-2.yaml @@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-phi-2 + annotations: + kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC6s_v3" labelSelector: From c7009283166935bdabb23d30ab6943f0eae2f6ae Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 14:40:36 -0800 Subject: [PATCH 05/10] revert: examples --- examples/kaito_workspace_falcon_40b-instruct.yaml | 2 -- examples/kaito_workspace_falcon_40b.yaml | 2 -- examples/kaito_workspace_falcon_7b-instruct.yaml | 2 -- examples/kaito_workspace_falcon_7b.yaml | 2 -- examples/kaito_workspace_llama2_13b-chat.yaml | 2 -- examples/kaito_workspace_llama2_13b.yaml | 2 -- examples/kaito_workspace_llama2_70b-chat.yaml | 2 -- examples/kaito_workspace_llama2_70b.yaml | 2 -- examples/kaito_workspace_llama2_7b-chat.yaml | 2 -- examples/kaito_workspace_llama2_7b.yaml | 2 -- examples/kaito_workspace_mistral_7b-instruct.yaml | 2 -- examples/kaito_workspace_mistral_7b.yaml | 2 -- examples/kaito_workspace_phi-2.yaml | 2 -- 13 files changed, 26 deletions(-) diff --git a/examples/kaito_workspace_falcon_40b-instruct.yaml b/examples/kaito_workspace_falcon_40b-instruct.yaml index 310a4b92b..33ea9d447 100644 --- a/examples/kaito_workspace_falcon_40b-instruct.yaml +++ b/examples/kaito_workspace_falcon_40b-instruct.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-40b-instruct - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" labelSelector: diff --git a/examples/kaito_workspace_falcon_40b.yaml b/examples/kaito_workspace_falcon_40b.yaml index 8ab0ca5b0..5006aa345 100644 --- a/examples/kaito_workspace_falcon_40b.yaml +++ b/examples/kaito_workspace_falcon_40b.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-40b - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" labelSelector: diff --git a/examples/kaito_workspace_falcon_7b-instruct.yaml b/examples/kaito_workspace_falcon_7b-instruct.yaml index 4bed252b4..95c807b79 100644 --- a/examples/kaito_workspace_falcon_7b-instruct.yaml +++ b/examples/kaito_workspace_falcon_7b-instruct.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-7b-instruct - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_falcon_7b.yaml b/examples/kaito_workspace_falcon_7b.yaml index 259b8959b..4eaf1590d 100644 --- a/examples/kaito_workspace_falcon_7b.yaml +++ b/examples/kaito_workspace_falcon_7b.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-falcon-7b - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_13b-chat.yaml b/examples/kaito_workspace_llama2_13b-chat.yaml index 3573d83a5..45c8a3b57 100644 --- a/examples/kaito_workspace_llama2_13b-chat.yaml +++ b/examples/kaito_workspace_llama2_13b-chat.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-13b-chat - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_13b.yaml b/examples/kaito_workspace_llama2_13b.yaml index 242ddfde8..8a0923cd6 100644 --- a/examples/kaito_workspace_llama2_13b.yaml +++ b/examples/kaito_workspace_llama2_13b.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-13b - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_70b-chat.yaml b/examples/kaito_workspace_llama2_70b-chat.yaml index 7456227d3..18b5e4835 100644 --- a/examples/kaito_workspace_llama2_70b-chat.yaml +++ b/examples/kaito_workspace_llama2_70b-chat.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-70b-chat - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" count: 2 diff --git a/examples/kaito_workspace_llama2_70b.yaml b/examples/kaito_workspace_llama2_70b.yaml index 1fb6e38aa..891440f03 100644 --- a/examples/kaito_workspace_llama2_70b.yaml +++ b/examples/kaito_workspace_llama2_70b.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-70b - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC96ads_A100_v4" count: 2 diff --git a/examples/kaito_workspace_llama2_7b-chat.yaml b/examples/kaito_workspace_llama2_7b-chat.yaml index e9f49c21a..b1c68544a 100644 --- a/examples/kaito_workspace_llama2_7b-chat.yaml +++ b/examples/kaito_workspace_llama2_7b-chat.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-7b-chat - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_llama2_7b.yaml b/examples/kaito_workspace_llama2_7b.yaml index 6d33f5ab5..ba72eb3eb 100644 --- a/examples/kaito_workspace_llama2_7b.yaml +++ b/examples/kaito_workspace_llama2_7b.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-llama-2-7b - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_mistral_7b-instruct.yaml b/examples/kaito_workspace_mistral_7b-instruct.yaml index 927c80515..6a7539d09 100644 --- a/examples/kaito_workspace_mistral_7b-instruct.yaml +++ b/examples/kaito_workspace_mistral_7b-instruct.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-mistral-7b-instruct - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_mistral_7b.yaml b/examples/kaito_workspace_mistral_7b.yaml index 275c930d6..47f69c995 100644 --- a/examples/kaito_workspace_mistral_7b.yaml +++ b/examples/kaito_workspace_mistral_7b.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-mistral-7b - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC12s_v3" labelSelector: diff --git a/examples/kaito_workspace_phi-2.yaml b/examples/kaito_workspace_phi-2.yaml index 86c44c40f..d1bb49eea 100644 --- a/examples/kaito_workspace_phi-2.yaml +++ b/examples/kaito_workspace_phi-2.yaml @@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1 kind: Workspace metadata: name: workspace-phi-2 - annotations: - kaito.sh/enablelb: "False" resource: instanceType: "Standard_NC6s_v3" labelSelector: From 4950357a31139273ea54246cd6e3b4c12f7b231f Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 14:42:04 -0800 Subject: [PATCH 06/10] nit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bf8669f2e..d8616eddf 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,7 @@ for interactive chat applications. They are typically the preferred choice for m conversational contexts. On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to -apply fine-tuned weights to these raw models, enhancing their functionality and application scope. +apply fine-tuned weights to these raw models. ## Trademarks This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft From b01c3ce1f46825ddf418a5ea3190c2f531dd7645 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 15:24:31 -0800 Subject: [PATCH 07/10] feat: add new q --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d8616eddf..fc6e382be 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio ## FAQ -### How to Update Model/Inference Parameters? +### How to Update Model/Inference Parameters to Override the Kaito Preset Configuration? To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute: @@ -122,7 +122,11 @@ For a comprehensive list of inference parameters for the text-generation models, - `pretrained_model_name_or_path`: Path to the pretrained model or model identifier from huggingface.co/models. - Additional parameters such as `state_dict`, `cache_dir`, `from_tf`, `force_download`, `resume_download`, `proxies`, `output_loading_info`, `allow_remote_files`, `revision`, `trust_remote_code`, `load_in_4bit`, `load_in_8bit`, `torch_dtype`, and `device_map` can also be customized as needed. -You can also introduce new parameters not listed above, which will be passed directly into the model. +Should you need an undocumented parameter, kindly file an issue for potential future inclusion. + +### How to upgrade the existing deployment to use the latest model configuration? + +When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec. ### What is the Difference Between Instruct and Non-Instruct Models? The main distinction lies in their intended use cases. Instruct models are fine-tuned versions optimized From 0ecf0436594520d4e6c19fbfa75eaafd46c6927b Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 15:26:27 -0800 Subject: [PATCH 08/10] feat: add new q --- README.md | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index fc6e382be..f3b51ca71 100644 --- a/README.md +++ b/README.md @@ -79,27 +79,11 @@ The detailed usage for Kaito supported models can be found in [**HERE**](presets The number of the supported models in Kaito is growing! Please check [this](./docs/How-to-add-new-models.md) document to see how to add a new supported model. -## Contributing - -[Read more](docs/contributing/readme.md) - -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit . - -When you submit a pull request, a CLA bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. - -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. - ## FAQ ### How to Update Model/Inference Parameters to Override the Kaito Preset Configuration? -To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. +To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute: ``` @@ -129,13 +113,29 @@ Should you need an undocumented parameter, kindly file an issue for potential fu When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec. ### What is the Difference Between Instruct and Non-Instruct Models? -The main distinction lies in their intended use cases. Instruct models are fine-tuned versions optimized -for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in +The main distinction lies in their intended use cases. Instruct models are fine-tuned versions optimized +for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in conversational contexts. -On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to +On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to apply fine-tuned weights to these raw models. +## Contributing + +[Read more](docs/contributing/readme.md) + +This project welcomes contributions and suggestions. Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit . + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + ## Trademarks This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general). From 94f6291a5f7312ad30d2fa933246ccc17bcc3687 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 15:28:14 -0800 Subject: [PATCH 09/10] fix: casing --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f3b51ca71..230f0ac6e 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ The number of the supported models in Kaito is growing! Please check [this](./do ## FAQ -### How to Update Model/Inference Parameters to Override the Kaito Preset Configuration? +### How to update model/inference parameters to override the Kaito Preset Configuration? To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute: @@ -112,7 +112,7 @@ Should you need an undocumented parameter, kindly file an issue for potential fu When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec. -### What is the Difference Between Instruct and Non-Instruct Models? +### What is the difference between instruct and non-instruct models? The main distinction lies in their intended use cases. Instruct models are fine-tuned versions optimized for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in conversational contexts. From 8b198637aaa9034220c49feabd16960dfb51051c Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 28 Feb 2024 15:32:14 -0800 Subject: [PATCH 10/10] fix: upgrade --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 230f0ac6e..3de7e194a 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,10 @@ The number of the supported models in Kaito is growing! Please check [this](./do ## FAQ +### How to upgrade the existing deployment to use the latest model configuration? + +When using hosted public models, a user can delete the existing inference workload (`Deployment` of `StatefulSet`) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec. + ### How to update model/inference parameters to override the Kaito Preset Configuration? To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. @@ -108,10 +112,6 @@ For a comprehensive list of inference parameters for the text-generation models, Should you need an undocumented parameter, kindly file an issue for potential future inclusion. -### How to upgrade the existing deployment to use the latest model configuration? - -When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec. - ### What is the difference between instruct and non-instruct models? The main distinction lies in their intended use cases. Instruct models are fine-tuned versions optimized for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in