Merge pull request #149 from Clarifai/EAGLE-3302

[EAGLE-3302] Allow users specify image sizes for image models
Clarifai · Aug 25, 2023 · 45db102 · 45db102
2 parents 5c45efc + 9cd2554
commit 45db102
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 23 deletions.
diff --git a/clarifai/models/model_serving/README.md b/clarifai/models/model_serving/README.md
@@ -8,16 +8,16 @@ A step by step guide to building your own triton inference model and deploying i
 
 1. Generate a triton model repository via commandline.
 ```console
-$ clarifai-model-upload-init --model_name=<Your model name> \
-		--model_type=<select model type from available ones> \
-		--repo_dir=<directory in which to create your model repository>
+$ clarifai-model-upload-init --model_name <Your model name> \
+		--model_type <select model type from available ones> \
+		--repo_dir <directory in which to create your model repository>
 ```
 2. Edit the `requirements.txt` file with dependencies needed to run inference on your model and the `labels.txt` (if available in dir) with the labels your model is to predict.
 3. Add your model loading and inference code inside `inference.py` script of the generated model repository under the `setup()` and `predict()` functions respectively. Refer to  The [Inference Script section]() for a description of this file.
 4. Generate a zip of your triton model for deployment via commandline.
 ```console
-$ clarifai-triton-zip --triton_model_repository=<path to triton model repository to be compressed> \
-    --zipfile_name=<name of the triton model zip> (Recommended to use 	  <model_name>_<model-type> convention for naming)
+$ clarifai-triton-zip --triton_model_repository <path to triton model repository to be compressed> \
+    --zipfile_name <name of the triton model zip> (Recommended to use 	  <model_name>_<model-type> convention for naming)
 ```
 5. Upload the generated zip to a public file storage service to get a URL to the zip. This URL must be publicly accessible and downloadable as it's necessary for the last step: uploading the model to a Clarifai app.
 6. Set your Clarifai auth credentials as environment variables.
@@ -120,6 +120,7 @@ Additional methods can be added to this script's `Infer` class by the user as de
 - [Model Output types docs](docs/output.md)
 - [Dependencies](docs/dependencies.md)
 - [Examples](examples/)
+- [Custom Configs](docs/custom_config.md/)
 
 ## Prerequisites
 

diff --git a/clarifai/models/model_serving/cli/repository.py b/clarifai/models/model_serving/cli/repository.py
@@ -19,6 +19,13 @@
 from ..pb_model_repository import TritonModelRepository
 
 
+def dims_type(shape_string: str):
+  """Read list string from cli and convert values to a list of integers."""
+  shape_string = shape_string.replace("[", "").replace("]", "")
+  shapes = list(map(int, shape_string.split(",")))
+  return shapes
+
+
 def model_upload_init():
   """
   Clarifai triton model upload commandline tool.
@@ -40,17 +47,40 @@ def model_upload_init():
       required=True,
       help=f"Clarifai supported model types.\n Model-types-map: {MODEL_TYPES}",
   )
+  parser.add_argument(
+      "--image_shape",
+      type=dims_type,
+      default="[-1, -1]",
+      required=False,
+      help=
+      f"(H, W) dims for models with an image input type. H and W each have a max value of 1024",
+  )
   parser.add_argument(
       "--repo_dir",
       type=str,
-      default=".",  #curdir
+      default=".",
       required=True,
       help="Directory to create triton repository.")
 
   args = parser.parse_args()
+  MAX_HW_DIM = 1024
+
+  if len(args.image_shape) != 2:
+    raise ValueError(
+        f"image_shape takes 2 values, Height and Width. Got {len(args.image_shape)} values instead."
+    )
+
+  if args.image_shape[0] > MAX_HW_DIM or args.image_shape[1] > MAX_HW_DIM:
+    raise ValueError(
+        f"H and W each have a maximum value of 1024. Got H: {args.image_shape[0]}, W: {args.image_shape[1]}"
+    )
 
   model_config = TritonModelConfig(
-      model_name=args.model_name, model_version="1", model_type=args.model_type)
+      model_name=args.model_name,
+      model_version="1",
+      model_type=args.model_type,
+      image_shape=args.image_shape,
+  )
 
   triton_repo = TritonModelRepository(model_config)
   triton_repo.build_repository(args.repo_dir)

diff --git a/clarifai/models/model_serving/docs/custom_config.md b/clarifai/models/model_serving/docs/custom_config.md
@@ -0,0 +1,34 @@
+## Custom Triton Configurations
+
+The commandline triton model repository generation utils do work with default values for the various triton configurations but a few of these config values can be modified to suit different task specific needs.
+
+* For vision models for instance, different input shapes for the `Height (H)` and `Width (W)` are supported and can be set via the commandline too.i.e.
+```console
+$ clarifai-model-upload-init --model_name <Your model name> \
+		--model_type <select model type from available ones> \
+		--image_shape "H, W"
+		--repo_dir <directory in which to create your model repository>
+```
+`H` and `W` each have a maximum value of 1024.
+`--image_shape` accepts both `"H, W"` and `"[H, W]"` format input.
+
+
+## Generating the triton model repository without the commandline
+
+The triton model repository can be generated via a python script specifying the same values as required in the commandline. Below is a sample of how the code would be structured.
+
+```python
+from clarifai.models.model_serving.model_config.triton_config import TritonModelConfig
+from clarifai.models.model_serving pb_model_repository import TritonModelRepository
+
+
+model_config = TritonModelConfig(
+	model_name="<model_name>",
+	model_version="1",
+	model_type="<model_type>",
+	image_shape=<[H,W]>, # 0 < [H,W] <= 1024
+)
+
+triton_repo = TritonModelRepository(model_config)
+triton_repo.build_repository("<dir>")
+```
diff --git a/clarifai/models/model_serving/model_config/triton_config.py b/clarifai/models/model_serving/model_config/triton_config.py
@@ -16,6 +16,24 @@
 from typing import List
 
 
+@dataclass
+class DType:
+  """
+  Triton Model Config data types.
+  """
+  # https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto
+  TYPE_UINT8: int = 2
+  TYPE_INT8: int = 6
+  TYPE_INT16: int = 7
+  TYPE_INT32: int = 8
+  TYPE_INT64: int = 9
+  TYPE_FP16: int = 10
+  TYPE_FP32: int = 11
+  TYPE_STRING: int = 13
+  KIND_GPU: int = 1
+  KIND_CPU: int = 2
+
+
 @dataclass
 class InputConfig:
   """
@@ -31,7 +49,7 @@ class InputConfig:
   InputConfig
   """
   name: str
-  data_type: str = None
+  data_type: int
   dims: List = field(default_factory=list)
 
 
@@ -51,7 +69,7 @@ class OutputConfig:
   OutputConfig
   """
   name: str
-  data_type: str = None
+  data_type: int
   dims: List = field(default_factory=list)
   labels: bool = False
 
@@ -81,9 +99,9 @@ class Device:
 
   def __post_init__(self):
     if self.use_gpu:
-      self.kind: str = "KIND_GPU"
+      self.kind: str = DType.KIND_GPU
     else:
-      self.kind: str = "KIND_CPU"
+      self.kind: str = DType.KIND_CPU
 
 
 @dataclass
@@ -124,6 +142,7 @@ class TritonModelConfig:
   model_name: str
   model_version: str
   model_type: str
+  image_shape: List  #(H, W)
   input: List[InputConfig] = field(default_factory=list)
   output: List[OutputConfig] = field(default_factory=list)
   instance_group: Device = Device()
@@ -136,60 +155,72 @@ def __post_init__(self):
     Set supported input dims and data_types for
     a given model_type.
     """
-    image_input = InputConfig(name="image", data_type="TYPE_UINT8", dims=[-1, -1, 3])
-    text_input = InputConfig(name="text", data_type="TYPE_STRING", dims=[1])
+    MAX_HW_DIM = 1024
+    if len(self.image_shape) != 2:
+      raise ValueError(
+          f"image_shape takes 2 values, Height and Width. Got {len(self.image_shape)} instead.")
+    if self.image_shape[0] > MAX_HW_DIM or self.image_shape[1] > MAX_HW_DIM:
+      raise ValueError(
+          f"H and W each have a maximum value of 1024. Got H: {self.image_shape[0]}, W: {self.image_shape[1]}"
+      )
+    image_dims = self.image_shape
+    image_dims.append(3)  # add channel dim
+    image_input = InputConfig(name="image", data_type=DType.TYPE_UINT8, dims=image_dims)
+    text_input = InputConfig(name="text", data_type=DType.TYPE_STRING, dims=[1])
+    # del image_shape as it's a temporary config that's not used by triton
+    del self.image_shape
 
     if self.model_type == "visual-detector":
       self.input.append(image_input)
-      pred_bboxes = OutputConfig(name="predicted_bboxes", data_type="TYPE_FP32", dims=[-1, 4])
+      pred_bboxes = OutputConfig(name="predicted_bboxes", data_type=DType.TYPE_FP32, dims=[-1, 4])
       pred_labels = OutputConfig(
-          name="predicted_labels", data_type="TYPE_INT32", dims=[-1, 1], labels=True)
+          name="predicted_labels", data_type=DType.TYPE_INT32, dims=[-1, 1], labels=True)
       del pred_labels.labels
-      pred_scores = OutputConfig(name="predicted_scores", data_type="TYPE_FP32", dims=[-1, 1])
+      pred_scores = OutputConfig(name="predicted_scores", data_type=DType.TYPE_FP32, dims=[-1, 1])
       self.output.extend([pred_bboxes, pred_labels, pred_scores])
 
     elif self.model_type == "visual-classifier":
       self.input.append(image_input)
       pred_labels = OutputConfig(
-          name="softmax_predictions", data_type="TYPE_FP32", dims=[-1], labels=True)
+          name="softmax_predictions", data_type=DType.TYPE_FP32, dims=[-1], labels=True)
       del pred_labels.labels
       self.output.append(pred_labels)
 
     elif self.model_type == "text-classifier":
       self.input.append(text_input)
       pred_labels = OutputConfig(
-          name="softmax_predictions", data_type="TYPE_FP32", dims=[-1], labels=True)
+          name="softmax_predictions", data_type=DType.TYPE_FP32, dims=[-1], labels=True)
       #'Len of out list expected to be the number of concepts returned by the model,
       # with each value being the confidence for the respective model output.
       del pred_labels.labels
       self.output.append(pred_labels)
 
     elif self.model_type == "text-to-text":
       self.input.append(text_input)
-      pred_text = OutputConfig(name="text", data_type="TYPE_STRING", dims=[1], labels=False)
+      pred_text = OutputConfig(name="text", data_type=DType.TYPE_STRING, dims=[1], labels=False)
       self.output.append(pred_text)
 
     elif self.model_type == "text-embedder":
       self.input.append(text_input)
       embedding_vector = OutputConfig(
-          name="embeddings", data_type="TYPE_FP32", dims=[-1], labels=False)
+          name="embeddings", data_type=DType.TYPE_FP32, dims=[-1], labels=False)
       self.output.append(embedding_vector)
 
     elif self.model_type == "text-to-image":
       self.input.append(text_input)
       gen_image = OutputConfig(
-          name="image", data_type="TYPE_UINT8", dims=[-1, -1, 3], labels=False)
+          name="image", data_type=DType.TYPE_UINT8, dims=[-1, -1, 3], labels=False)
       self.output.append(gen_image)
 
     elif self.model_type == "visual-embedder":
       self.input.append(image_input)
       embedding_vector = OutputConfig(
-          name="embeddings", data_type="TYPE_FP32", dims=[-1], labels=False)
+          name="embeddings", data_type=DType.TYPE_FP32, dims=[-1], labels=False)
       self.output.append(embedding_vector)
 
     elif self.model_type == "visual-segmenter":
       self.input.append(image_input)
       pred_masks = OutputConfig(
-          name="predicted_mask", data_type="TYPE_INT64", dims=[-1, -1], labels=True)
+          name="predicted_mask", data_type=DType.TYPE_INT64, dims=[-1, -1], labels=True)
       del pred_masks.labels
       self.output.append(pred_masks)