From 622c1f93ba3088bd43b7ae130256c8048ec89c02 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 27 Feb 2024 15:57:27 -0800
Subject: [PATCH 01/10] fix: add test case

---
 pkg/utils/testUtils.go | 9 +++++++++
 1 file changed, 9 insertions(+)
diff --git a/pkg/utils/testUtils.go b/pkg/utils/testUtils.go
index f88b35a4f..bdb1eb621 100644
--- a/pkg/utils/testUtils.go
+++ b/pkg/utils/testUtils.go
@@ -140,6 +140,15 @@ var (
 				},
 			},
 		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: "node4",
+				Labels: map[string]string{
+					corev1.LabelInstanceTypeStable: "Standard_NC12s_v3",
+				},
+				DeletionTimestamp: &metav1.Time{Time: time.Now()},
+			},
+		},
 	}
 )
 

From 0c5b71b86c9d0fc05a01e9ee9457d395b9301fc1 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 27 Feb 2024 15:59:00 -0800
Subject: [PATCH 02/10] fix: add import

---
 pkg/utils/testUtils.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/utils/testUtils.go b/pkg/utils/testUtils.go
index bdb1eb621..185af6049 100644
--- a/pkg/utils/testUtils.go
+++ b/pkg/utils/testUtils.go
@@ -4,6 +4,7 @@
 package utils
 
 import (
+	"time"
 	"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
 	"github.com/azure/kaito/api/v1alpha1"
 	appsv1 "k8s.io/api/apps/v1"

From 650be05858b7b86641cb7f057cc8c9478ed90a4f Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Tue, 27 Feb 2024 16:36:37 -0800
Subject: [PATCH 03/10] fix: upgrade mistral

---
 pkg/utils/testUtils.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pkg/utils/testUtils.go b/pkg/utils/testUtils.go
index 185af6049..f88b35a4f 100644
--- a/pkg/utils/testUtils.go
+++ b/pkg/utils/testUtils.go
@@ -4,7 +4,6 @@
 package utils
 
 import (
-	"time"
 	"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
 	"github.com/azure/kaito/api/v1alpha1"
 	appsv1 "k8s.io/api/apps/v1"
@@ -141,15 +140,6 @@ var (
 				},
 			},
 		},
-		{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "node4",
-				Labels: map[string]string{
-					corev1.LabelInstanceTypeStable: "Standard_NC12s_v3",
-				},
-				DeletionTimestamp: &metav1.Time{Time: time.Now()},
-			},
-		},
 	}
 )
 

From 02040be38a606d43a7d9b4670a846327688d420a Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 14:17:20 -0800
Subject: [PATCH 04/10] fix: Add test case

---
 README.md                                     | 39 ++++++++++++++++++-
 .../kaito_workspace_falcon_40b-instruct.yaml  |  2 +
 examples/kaito_workspace_falcon_40b.yaml      |  2 +
 .../kaito_workspace_falcon_7b-instruct.yaml   |  2 +
 examples/kaito_workspace_falcon_7b.yaml       |  2 +
 examples/kaito_workspace_llama2_13b-chat.yaml |  2 +
 examples/kaito_workspace_llama2_13b.yaml      |  2 +
 examples/kaito_workspace_llama2_70b-chat.yaml |  2 +
 examples/kaito_workspace_llama2_70b.yaml      |  2 +
 examples/kaito_workspace_llama2_7b-chat.yaml  |  2 +
 examples/kaito_workspace_llama2_7b.yaml       |  2 +
 .../kaito_workspace_mistral_7b-instruct.yaml  |  2 +
 examples/kaito_workspace_mistral_7b.yaml      |  2 +
 examples/kaito_workspace_phi-2.yaml           |  2 +
 14 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 242ea325c..bf8669f2e 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ Using Kaito, the workflow of onboarding large AI inference models in Kubernetes
 
 Kaito follows the classic Kubernetes Custom Resource Definition(CRD)/controller design pattern. User manages a `workspace` custom resource which describes the GPU requirements and the inference specification. Kaito controllers will automate the deployment by reconciling the `workspace` custom resource.
 <div align="left">
-  <img src="docs/img/arch.png" width=80% title="Kaito architecture">
+  <img src="docs/img/arch.png" width=80% title="Kaito architecture" alt="Kaito architecture">
 </div>
 
 The above figure presents the Kaito architecture overview. Its major components consist of:
@@ -95,6 +95,43 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope
 For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
 contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
 
+## FAQ
+
+### How to Update Model/Inference Parameters? 
+
+To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. 
+For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute:
+
+```
+kubectl edit deployment workspace-falcon-7b-instruct
+```
+
+Within the deployment configuration, locate the command section and modify it as follows:
+
+Original command:
+```
+accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16
+```
+Modified command to enable 4-bit Quantization
+```
+accelerate launch --num_processes 1 --num_machines 1 --machine_rank 0 --gpu_ids all inference_api.py --pipeline text-generation --torch_dtype bfloat16 --load_in_4bit
+```
+
+For a comprehensive list of inference parameters for the text-generation models, refer to the following options:
+- `pipeline`: The model pipeline for the pre-trained model. For text-generation models this can be either `text-generation` or `conversational`
+- `pretrained_model_name_or_path`: Path to the pretrained model or model identifier from huggingface.co/models.
+- Additional parameters such as `state_dict`, `cache_dir`, `from_tf`, `force_download`, `resume_download`, `proxies`, `output_loading_info`, `allow_remote_files`, `revision`, `trust_remote_code`, `load_in_4bit`, `load_in_8bit`, `torch_dtype`, and `device_map` can also be customized as needed.
+
+You can also introduce new parameters not listed above, which will be passed directly into the model.
+
+### What is the Difference Between Instruct and Non-Instruct Models?
+The main distinction lies in their intended use cases.  Instruct models are fine-tuned versions optimized 
+for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in 
+conversational contexts.
+
+On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to 
+apply fine-tuned weights to these raw models, enhancing their functionality and application scope.
+
 ## Trademarks
 This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
 trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general).
diff --git a/examples/kaito_workspace_falcon_40b-instruct.yaml b/examples/kaito_workspace_falcon_40b-instruct.yaml
index 33ea9d447..310a4b92b 100644
--- a/examples/kaito_workspace_falcon_40b-instruct.yaml
+++ b/examples/kaito_workspace_falcon_40b-instruct.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-40b-instruct
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   labelSelector:
diff --git a/examples/kaito_workspace_falcon_40b.yaml b/examples/kaito_workspace_falcon_40b.yaml
index 5006aa345..8ab0ca5b0 100644
--- a/examples/kaito_workspace_falcon_40b.yaml
+++ b/examples/kaito_workspace_falcon_40b.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-40b
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   labelSelector:
diff --git a/examples/kaito_workspace_falcon_7b-instruct.yaml b/examples/kaito_workspace_falcon_7b-instruct.yaml
index 95c807b79..4bed252b4 100644
--- a/examples/kaito_workspace_falcon_7b-instruct.yaml
+++ b/examples/kaito_workspace_falcon_7b-instruct.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-7b-instruct
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_falcon_7b.yaml b/examples/kaito_workspace_falcon_7b.yaml
index 4eaf1590d..259b8959b 100644
--- a/examples/kaito_workspace_falcon_7b.yaml
+++ b/examples/kaito_workspace_falcon_7b.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-7b
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_13b-chat.yaml b/examples/kaito_workspace_llama2_13b-chat.yaml
index 45c8a3b57..3573d83a5 100644
--- a/examples/kaito_workspace_llama2_13b-chat.yaml
+++ b/examples/kaito_workspace_llama2_13b-chat.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-13b-chat
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_13b.yaml b/examples/kaito_workspace_llama2_13b.yaml
index 8a0923cd6..242ddfde8 100644
--- a/examples/kaito_workspace_llama2_13b.yaml
+++ b/examples/kaito_workspace_llama2_13b.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-13b
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_70b-chat.yaml b/examples/kaito_workspace_llama2_70b-chat.yaml
index 18b5e4835..7456227d3 100644
--- a/examples/kaito_workspace_llama2_70b-chat.yaml
+++ b/examples/kaito_workspace_llama2_70b-chat.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-70b-chat
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   count: 2
diff --git a/examples/kaito_workspace_llama2_70b.yaml b/examples/kaito_workspace_llama2_70b.yaml
index 891440f03..1fb6e38aa 100644
--- a/examples/kaito_workspace_llama2_70b.yaml
+++ b/examples/kaito_workspace_llama2_70b.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-70b
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   count: 2
diff --git a/examples/kaito_workspace_llama2_7b-chat.yaml b/examples/kaito_workspace_llama2_7b-chat.yaml
index b1c68544a..e9f49c21a 100644
--- a/examples/kaito_workspace_llama2_7b-chat.yaml
+++ b/examples/kaito_workspace_llama2_7b-chat.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-7b-chat
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_7b.yaml b/examples/kaito_workspace_llama2_7b.yaml
index ba72eb3eb..6d33f5ab5 100644
--- a/examples/kaito_workspace_llama2_7b.yaml
+++ b/examples/kaito_workspace_llama2_7b.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-7b
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_mistral_7b-instruct.yaml b/examples/kaito_workspace_mistral_7b-instruct.yaml
index 6a7539d09..927c80515 100644
--- a/examples/kaito_workspace_mistral_7b-instruct.yaml
+++ b/examples/kaito_workspace_mistral_7b-instruct.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-mistral-7b-instruct
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_mistral_7b.yaml b/examples/kaito_workspace_mistral_7b.yaml
index 47f69c995..275c930d6 100644
--- a/examples/kaito_workspace_mistral_7b.yaml
+++ b/examples/kaito_workspace_mistral_7b.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-mistral-7b
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_phi-2.yaml b/examples/kaito_workspace_phi-2.yaml
index d1bb49eea..86c44c40f 100644
--- a/examples/kaito_workspace_phi-2.yaml
+++ b/examples/kaito_workspace_phi-2.yaml
@@ -2,6 +2,8 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-phi-2
+  annotations:
+    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC6s_v3"
   labelSelector:

From c7009283166935bdabb23d30ab6943f0eae2f6ae Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 14:40:36 -0800
Subject: [PATCH 05/10] revert: examples

---
 examples/kaito_workspace_falcon_40b-instruct.yaml | 2 --
 examples/kaito_workspace_falcon_40b.yaml          | 2 --
 examples/kaito_workspace_falcon_7b-instruct.yaml  | 2 --
 examples/kaito_workspace_falcon_7b.yaml           | 2 --
 examples/kaito_workspace_llama2_13b-chat.yaml     | 2 --
 examples/kaito_workspace_llama2_13b.yaml          | 2 --
 examples/kaito_workspace_llama2_70b-chat.yaml     | 2 --
 examples/kaito_workspace_llama2_70b.yaml          | 2 --
 examples/kaito_workspace_llama2_7b-chat.yaml      | 2 --
 examples/kaito_workspace_llama2_7b.yaml           | 2 --
 examples/kaito_workspace_mistral_7b-instruct.yaml | 2 --
 examples/kaito_workspace_mistral_7b.yaml          | 2 --
 examples/kaito_workspace_phi-2.yaml               | 2 --
 13 files changed, 26 deletions(-)

diff --git a/examples/kaito_workspace_falcon_40b-instruct.yaml b/examples/kaito_workspace_falcon_40b-instruct.yaml
index 310a4b92b..33ea9d447 100644
--- a/examples/kaito_workspace_falcon_40b-instruct.yaml
+++ b/examples/kaito_workspace_falcon_40b-instruct.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-40b-instruct
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   labelSelector:
diff --git a/examples/kaito_workspace_falcon_40b.yaml b/examples/kaito_workspace_falcon_40b.yaml
index 8ab0ca5b0..5006aa345 100644
--- a/examples/kaito_workspace_falcon_40b.yaml
+++ b/examples/kaito_workspace_falcon_40b.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-40b
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   labelSelector:
diff --git a/examples/kaito_workspace_falcon_7b-instruct.yaml b/examples/kaito_workspace_falcon_7b-instruct.yaml
index 4bed252b4..95c807b79 100644
--- a/examples/kaito_workspace_falcon_7b-instruct.yaml
+++ b/examples/kaito_workspace_falcon_7b-instruct.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-7b-instruct
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_falcon_7b.yaml b/examples/kaito_workspace_falcon_7b.yaml
index 259b8959b..4eaf1590d 100644
--- a/examples/kaito_workspace_falcon_7b.yaml
+++ b/examples/kaito_workspace_falcon_7b.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-falcon-7b
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_13b-chat.yaml b/examples/kaito_workspace_llama2_13b-chat.yaml
index 3573d83a5..45c8a3b57 100644
--- a/examples/kaito_workspace_llama2_13b-chat.yaml
+++ b/examples/kaito_workspace_llama2_13b-chat.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-13b-chat
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_13b.yaml b/examples/kaito_workspace_llama2_13b.yaml
index 242ddfde8..8a0923cd6 100644
--- a/examples/kaito_workspace_llama2_13b.yaml
+++ b/examples/kaito_workspace_llama2_13b.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-13b
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_70b-chat.yaml b/examples/kaito_workspace_llama2_70b-chat.yaml
index 7456227d3..18b5e4835 100644
--- a/examples/kaito_workspace_llama2_70b-chat.yaml
+++ b/examples/kaito_workspace_llama2_70b-chat.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-70b-chat
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   count: 2
diff --git a/examples/kaito_workspace_llama2_70b.yaml b/examples/kaito_workspace_llama2_70b.yaml
index 1fb6e38aa..891440f03 100644
--- a/examples/kaito_workspace_llama2_70b.yaml
+++ b/examples/kaito_workspace_llama2_70b.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-70b
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC96ads_A100_v4"
   count: 2
diff --git a/examples/kaito_workspace_llama2_7b-chat.yaml b/examples/kaito_workspace_llama2_7b-chat.yaml
index e9f49c21a..b1c68544a 100644
--- a/examples/kaito_workspace_llama2_7b-chat.yaml
+++ b/examples/kaito_workspace_llama2_7b-chat.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-7b-chat
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_llama2_7b.yaml b/examples/kaito_workspace_llama2_7b.yaml
index 6d33f5ab5..ba72eb3eb 100644
--- a/examples/kaito_workspace_llama2_7b.yaml
+++ b/examples/kaito_workspace_llama2_7b.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-llama-2-7b
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_mistral_7b-instruct.yaml b/examples/kaito_workspace_mistral_7b-instruct.yaml
index 927c80515..6a7539d09 100644
--- a/examples/kaito_workspace_mistral_7b-instruct.yaml
+++ b/examples/kaito_workspace_mistral_7b-instruct.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-mistral-7b-instruct
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_mistral_7b.yaml b/examples/kaito_workspace_mistral_7b.yaml
index 275c930d6..47f69c995 100644
--- a/examples/kaito_workspace_mistral_7b.yaml
+++ b/examples/kaito_workspace_mistral_7b.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-mistral-7b
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC12s_v3"
   labelSelector:
diff --git a/examples/kaito_workspace_phi-2.yaml b/examples/kaito_workspace_phi-2.yaml
index 86c44c40f..d1bb49eea 100644
--- a/examples/kaito_workspace_phi-2.yaml
+++ b/examples/kaito_workspace_phi-2.yaml
@@ -2,8 +2,6 @@ apiVersion: kaito.sh/v1alpha1
 kind: Workspace
 metadata:
   name: workspace-phi-2
-  annotations:
-    kaito.sh/enablelb: "False"
 resource:
   instanceType: "Standard_NC6s_v3"
   labelSelector:

From 4950357a31139273ea54246cd6e3b4c12f7b231f Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 14:42:04 -0800
Subject: [PATCH 06/10] nit

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bf8669f2e..d8616eddf 100644
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ for interactive chat applications. They are typically the preferred choice for m
 conversational contexts.
 
 On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to 
-apply fine-tuned weights to these raw models, enhancing their functionality and application scope.
+apply fine-tuned weights to these raw models.
 
 ## Trademarks
 This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft

From b01c3ce1f46825ddf418a5ea3190c2f531dd7645 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 15:24:31 -0800
Subject: [PATCH 07/10] feat: add new q

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d8616eddf..fc6e382be 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
 
 ## FAQ
 
-### How to Update Model/Inference Parameters? 
+### How to Update Model/Inference Parameters to Override the Kaito Preset Configuration?
 
 To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. 
 For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute:
@@ -122,7 +122,11 @@ For a comprehensive list of inference parameters for the text-generation models,
 - `pretrained_model_name_or_path`: Path to the pretrained model or model identifier from huggingface.co/models.
 - Additional parameters such as `state_dict`, `cache_dir`, `from_tf`, `force_download`, `resume_download`, `proxies`, `output_loading_info`, `allow_remote_files`, `revision`, `trust_remote_code`, `load_in_4bit`, `load_in_8bit`, `torch_dtype`, and `device_map` can also be customized as needed.
 
-You can also introduce new parameters not listed above, which will be passed directly into the model.
+Should you need an undocumented parameter, kindly file an issue for potential future inclusion.
+
+### How to upgrade the existing deployment to use the latest model configuration?
+
+When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec.
 
 ### What is the Difference Between Instruct and Non-Instruct Models?
 The main distinction lies in their intended use cases.  Instruct models are fine-tuned versions optimized 

From 0ecf0436594520d4e6c19fbfa75eaafd46c6927b Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 15:26:27 -0800
Subject: [PATCH 08/10] feat: add new q

---
 README.md | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index fc6e382be..f3b51ca71 100644
--- a/README.md
+++ b/README.md
@@ -79,27 +79,11 @@ The detailed usage for Kaito supported models can be found in [**HERE**](presets
 
 The number of the supported models in Kaito is growing! Please check [this](./docs/How-to-add-new-models.md) document to see how to add a new supported model.
 
-## Contributing
-
-[Read more](docs/contributing/readme.md)
-<!-- markdown-link-check-disable -->
-This project welcomes contributions and suggestions.  Most contributions require you to agree to a
-Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
-the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
-
-When you submit a pull request, a CLA bot will automatically determine whether you need to provide
-a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
-provided by the bot. You will only need to do this once across all repos using our CLA.
-
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
-For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
-contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
-
 ## FAQ
 
 ### How to Update Model/Inference Parameters to Override the Kaito Preset Configuration?
 
-To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`. 
+To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`.
 For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute:
 
 ```
@@ -129,13 +113,29 @@ Should you need an undocumented parameter, kindly file an issue for potential fu
 When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec.
 
 ### What is the Difference Between Instruct and Non-Instruct Models?
-The main distinction lies in their intended use cases.  Instruct models are fine-tuned versions optimized 
-for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in 
+The main distinction lies in their intended use cases.  Instruct models are fine-tuned versions optimized
+for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in
 conversational contexts.
 
-On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to 
+On the other hand, non-instruct, or raw models, are designed for further fine-tuning. Future developments in Kaito may include features that allow users to
 apply fine-tuned weights to these raw models.
 
+## Contributing
+
+[Read more](docs/contributing/readme.md)
+<!-- markdown-link-check-disable -->
+This project welcomes contributions and suggestions.  Most contributions require you to agree to a
+Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
+the rights to use your contribution. For details, visit <https://cla.opensource.microsoft.com>.
+
+When you submit a pull request, a CLA bot will automatically determine whether you need to provide
+a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
+provided by the bot. You will only need to do this once across all repos using our CLA.
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
+contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+
 ## Trademarks
 This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
 trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general).

From 94f6291a5f7312ad30d2fa933246ccc17bcc3687 Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 15:28:14 -0800
Subject: [PATCH 09/10] fix: casing

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f3b51ca71..230f0ac6e 100644
--- a/README.md
+++ b/README.md
@@ -81,7 +81,7 @@ The number of the supported models in Kaito is growing! Please check [this](./do
 
 ## FAQ
 
-### How to Update Model/Inference Parameters to Override the Kaito Preset Configuration?
+### How to update model/inference parameters to override the Kaito Preset Configuration?
 
 To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`.
 For example, to enable 4-bit quantization on a `falcon-7b-instruct` deployment, you would execute:
@@ -112,7 +112,7 @@ Should you need an undocumented parameter, kindly file an issue for potential fu
 
 When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec.
 
-### What is the Difference Between Instruct and Non-Instruct Models?
+### What is the difference between instruct and non-instruct models?
 The main distinction lies in their intended use cases.  Instruct models are fine-tuned versions optimized
 for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in
 conversational contexts.

From 8b198637aaa9034220c49feabd16960dfb51051c Mon Sep 17 00:00:00 2001
From: ishaansehgal99 <ishaanforthewin@gmail.com>
Date: Wed, 28 Feb 2024 15:32:14 -0800
Subject: [PATCH 10/10] fix: upgrade

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 230f0ac6e..3de7e194a 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,10 @@ The number of the supported models in Kaito is growing! Please check [this](./do
 
 ## FAQ
 
+### How to upgrade the existing deployment to use the latest model configuration?
+
+When using hosted public models, a user can delete the existing inference workload (`Deployment` of `StatefulSet`) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec.
+
 ### How to update model/inference parameters to override the Kaito Preset Configuration?
 
 To update model or inference parameters for a deployed service, perform a `kubectl edit` on the workload type, which could be either a `StatefulSet` or `Deployment`.
@@ -108,10 +112,6 @@ For a comprehensive list of inference parameters for the text-generation models,
 
 Should you need an undocumented parameter, kindly file an issue for potential future inclusion.
 
-### How to upgrade the existing deployment to use the latest model configuration?
-
-When using hosted public models, a user can delete the existing inference workload (Deployment of StatefulSet) manually, and the workspace controller will create a new one with the latest preset configuration (e.g., the image version) defined in the current release. For private models, it is recommended to create a new workspace with a new image version in the Spec.
-
 ### What is the difference between instruct and non-instruct models?
 The main distinction lies in their intended use cases.  Instruct models are fine-tuned versions optimized
 for interactive chat applications. They are typically the preferred choice for most implementations due to their enhanced performance in