From 4a0234b47f5891e2df514a67eb25197467cae64f Mon Sep 17 00:00:00 2001
From: Boris Fomitchev <borisfom@users.noreply.github.com>
Date: Mon, 2 Sep 2024 23:31:46 -0700
Subject: [PATCH] Added trt_compile configs for vista2d and vista3d (#632)

Fixes # .

### Description
A few sentences describing the changes proposed in this pull request.

### Status
**Ready/Work in progress/Hold**

### Please ensure all the checkboxes:
<!--- Put an `x` in all the boxes that apply, and remove the not
applicable items -->
- [x] Codeformat tests passed locally by running `./runtests.sh
--codeformat`.
- [ ] In-line docstrings updated.
- [ ] Update `version` and `changelog` in `metadata.json` if changing an
existing bundle.
- [ ] Please ensure the naming rules in config files meet our
requirements (please refer to: `CONTRIBUTING.md`).
- [ ] Ensure versions of packages such as `monai`, `pytorch` and `numpy`
are correct in `metadata.json`.
- [ ] Descriptions should be consistent with the content, such as
`eval_metrics` of the provided weights and TorchScript modules.
- [ ] Files larger than 25MB are excluded and replaced by providing
download links in `large_file.yml`.
- [ ] Avoid using path that contains personal information within config
files (such as use `/home/your_name/` for `"bundle_root"`).

---------

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Signed-off-by: Yiheng Wang <vennw@nvidia.com>
Co-authored-by: Yiheng Wang <68361391+yiheng-wang-nv@users.noreply.github.com>
Co-authored-by: Yiheng Wang <vennw@nvidia.com>
---
 models/vista2d/configs/inference.json     |  2 +-
 models/vista2d/configs/inference_trt.json | 10 ++++++++++
 models/vista2d/configs/metadata.json      |  3 ++-
 models/vista2d/docs/README.md             |  8 +++++++-
 models/vista3d/configs/inference_trt.json |  9 +++++++++
 models/vista3d/configs/metadata.json      |  3 ++-
 models/vista3d/docs/README.md             |  7 +++++++
 7 files changed, 38 insertions(+), 4 deletions(-)
 create mode 100644 models/vista2d/configs/inference_trt.json
 create mode 100644 models/vista3d/configs/inference_trt.json

diff --git a/models/vista2d/configs/inference.json b/models/vista2d/configs/inference.json
index f56a5c47..2439a4a5 100644
--- a/models/vista2d/configs/inference.json
+++ b/models/vista2d/configs/inference.json
@@ -11,7 +11,7 @@
         256,
         256
     ],
-    "input_dict": "${'image': '/home/venn/Desktop/data/medical/cellpose_dataset/test/001_img.png'}",
+    "input_dict": "${'image': '/cellpose_dataset/test/001_img.png'}",
     "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
     "sam_ckpt_path": "$@ckpt_dir + '/sam_vit_b_01ec64.pth'",
     "pretrained_ckpt_path": "$@ckpt_dir + '/model.pt'",
diff --git a/models/vista2d/configs/inference_trt.json b/models/vista2d/configs/inference_trt.json
new file mode 100644
index 00000000..17e9ca80
--- /dev/null
+++ b/models/vista2d/configs/inference_trt.json
@@ -0,0 +1,10 @@
+{
+    "imports": [
+        "$import numpy",
+        "$from monai.networks import trt_compile"
+    ],
+    "trt_args": {
+        "dynamic_batchsize": "$[1, @inferer#sw_batch_size, @inferer#sw_batch_size]"
+    },
+    "network": "$trt_compile(@network_def.to(@device), @pretrained_ckpt_path, args=@trt_args)"
+}
diff --git a/models/vista2d/configs/metadata.json b/models/vista2d/configs/metadata.json
index 05ad27a9..67fe195a 100644
--- a/models/vista2d/configs/metadata.json
+++ b/models/vista2d/configs/metadata.json
@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20240725.json",
-    "version": "0.2.3",
+    "version": "0.2.4",
     "changelog": {
+        "0.2.4": "enable tensorrt inference",
         "0.2.3": "update weights link",
         "0.2.2": "update to use monai components",
         "0.2.1": "initial OSS version"
diff --git a/models/vista2d/docs/README.md b/models/vista2d/docs/README.md
index c2f221be..a3a902e4 100644
--- a/models/vista2d/docs/README.md
+++ b/models/vista2d/docs/README.md
@@ -66,7 +66,13 @@ torchrun --nproc_per_node=gpu -m monai.bundle run_workflow "scripts.workflow.Vis
 python -m monai.bundle run --config_file configs/inference.json
 ```
 
-Please note that the data used in the config file is: "/cellpose_dataset/test/001_img.png", if the dataset path is different or you want to do inference on another file, please modify in `configs/inference.json` accordingly.
+Please note that the data used in this config file is: "/cellpose_dataset/test/001_img.png", if the dataset path is different or you want to do inference on another file, please modify in `configs/inference.json` accordingly.
+
+#### Execute inference with the TensorRT model:
+
+```
+python -m monai.bundle run --config_file "['configs/inference.json', 'configs/inference_trt.json']"
+```
 
 ### Execute multi-GPU inference
 ```bash
diff --git a/models/vista3d/configs/inference_trt.json b/models/vista3d/configs/inference_trt.json
new file mode 100644
index 00000000..9c3d52dc
--- /dev/null
+++ b/models/vista3d/configs/inference_trt.json
@@ -0,0 +1,9 @@
+{
+    "+imports": [
+        "$from monai.networks import trt_compile"
+    ],
+    "trt_args": {
+        "dynamic_batchsize": "$[1, @inferer#sw_batch_size, @inferer#sw_batch_size]"
+    },
+    "network": "$trt_compile(@network_def.to(@device), @bundle_root + '/models/model.pt', args=@trt_args, submodule=['image_encoder.encoder', 'class_head'])"
+}
diff --git a/models/vista3d/configs/metadata.json b/models/vista3d/configs/metadata.json
index 5a1b31c1..92a801d5 100644
--- a/models/vista3d/configs/metadata.json
+++ b/models/vista3d/configs/metadata.json
@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20240725.json",
-    "version": "0.4.3",
+    "version": "0.4.4",
     "changelog": {
+        "0.4.4": "enable tensorrt inference",
         "0.4.3": "fix CL and batch infer issues",
         "0.4.2": "use MONAI components for network and utils",
         "0.4.1": "initial OSS version"
diff --git a/models/vista3d/docs/README.md b/models/vista3d/docs/README.md
index 386796cf..741dbcb1 100644
--- a/models/vista3d/docs/README.md
+++ b/models/vista3d/docs/README.md
@@ -184,6 +184,13 @@ This default is overridable by changing the input folder `input_dir`, or the inp
 
 Set `"postprocessing#transforms#0#_disabled_": false` to move the postprocessing to cpu to reduce the GPU memory footprint.
 
+#### Execute inference with the TensorRT model:
+
+```
+python -m monai.bundle run --config_file "['configs/inference.json', 'configs/inference_trt.json']"
+```
+
+
 ## Automatic segmentation label prompts :
 The mapping between organ name and label prompt is in the [json file](labels.json)