diff --git a/.github/update-model-info.yml b/.github/workflows/update-model-info.yml similarity index 100% rename from .github/update-model-info.yml rename to .github/workflows/update-model-info.yml diff --git a/ci/bundle_custom_data.py b/ci/bundle_custom_data.py index 3e2c3bd3..1b10a7a3 100644 --- a/ci/bundle_custom_data.py +++ b/ci/bundle_custom_data.py @@ -44,15 +44,23 @@ install_dependency_dict = {} # This list is used for our CI tests to determine whether a bundle supports TensorRT export. Related -# test will be employed for bundles in the list. -include_verify_tensorrt_list = ["spleen_ct_segmentation", "endoscopic_tool_segmentation", "pathology_tumor_detection"] +# test will be employed for bundles in the dict. +include_verify_tensorrt_dict = { + "spleen_ct_segmentation": {}, + "endoscopic_tool_segmentation": {}, + "pathology_tumor_detection": {}, + "pancreas_ct_dints_segmentation": { + "use_trace": True, + "converter_kwargs": {"truncate_long_and_double": True, "torch_executed_ops": ["aten::upsample_trilinear3d"]}, + }, +} # This list is used for our CI tests to determine whether a bundle supports ONNX-TensorRT export. Related -# test will be employed for bundles in the list. -include_verify_onnx_tensorrt_list = [ - "brats_mri_segmentation", - "endoscopic_inbody_classification", - "spleen_deepedit_annotation", - "spleen_ct_segmentation", - "lung_nodule_ct_detection", -] +# test will be employed for bundles in the dict. +include_verify_onnx_tensorrt_dict = { + "brats_mri_segmentation": {}, + "endoscopic_inbody_classification": {}, + "spleen_deepedit_annotation": {}, + "spleen_ct_segmentation": {}, + "lung_nodule_ct_detection": {}, +} diff --git a/ci/verify_tensorrt.py b/ci/verify_tensorrt.py index ee9e54d2..e490e084 100644 --- a/ci/verify_tensorrt.py +++ b/ci/verify_tensorrt.py @@ -12,7 +12,7 @@ import os import torch -from bundle_custom_data import include_verify_onnx_tensorrt_list, include_verify_tensorrt_list +from bundle_custom_data import include_verify_onnx_tensorrt_dict, include_verify_tensorrt_dict from download_latest_bundle import download_latest_bundle from monai.bundle import trt_export from verify_bundle import _find_bundle_file @@ -24,22 +24,12 @@ def verify_tensorrt(export_context): the exported model will be checked if it is able to be loaded successfully. """ - bundle_path = export_context["bundle_path"] + bundle_root = export_context["bundle_root"] precision = export_context["precision"] - config_file = export_context["config_file"] - trt_model_path = os.path.join(bundle_path, f"models/model_trt_{precision}.ts") + trt_model_path = os.path.join(bundle_root, f"models/model_trt_{precision}.ts") + export_context["filepath"] = trt_model_path try: - trt_export( - net_id=export_context["net_id"], - filepath=trt_model_path, - ckpt_file=os.path.join(bundle_path, "models/model.pt"), - meta_file=os.path.join(bundle_path, "configs/metadata.json"), - config_file=os.path.join(bundle_path, config_file), - precision=precision, - bundle_root=bundle_path, - use_onnx=export_context["use_onnx"], - use_trace=export_context["use_trace"], - ) + trt_export(**export_context) except Exception as e: print(f"'trt_export' failed with error: {e}") raise @@ -55,11 +45,16 @@ def get_export_required_files(bundle: str, download_path: str, use_onnx: bool = bundle_path = os.path.join(download_path, bundle) net_id, inference_file_name = "network_def", _find_bundle_file(os.path.join(bundle_path, "configs"), "inference") config_file = os.path.join("configs", inference_file_name) + ckpt_file = os.path.join(bundle_path, "models/model.pt") + meta_file = os.path.join(bundle_path, "configs/metadata.json") + config_file = os.path.join(bundle_path, config_file) + export_context = { - "bundle_path": bundle_path, + "bundle_root": bundle_path, "net_id": net_id, - "inference_file_name": inference_file_name, "config_file": config_file, + "ckpt_file": ckpt_file, + "meta_file": meta_file, "use_onnx": use_onnx, "use_trace": use_trace, } @@ -72,9 +67,11 @@ def verify_all_tensorrt_bundles(download_path="download"): """ - for bundle in include_verify_tensorrt_list: + for bundle in include_verify_tensorrt_dict: print(f"start verifying bundle {bundle} into TensorRT module.") export_context = get_export_required_files(bundle, download_path) + extra_parameters = include_verify_tensorrt_dict[bundle] + export_context.update(extra_parameters) for precision in ["fp32", "fp16"]: export_context["precision"] = precision try: @@ -92,9 +89,11 @@ def verify_all_onnx_tensorrt_bundles(download_path="download"): """ - for bundle in include_verify_onnx_tensorrt_list: + for bundle in include_verify_onnx_tensorrt_dict: print(f"start verifying export bundle {bundle} into ONNX-TensorRT module.") + extra_parameters = include_verify_onnx_tensorrt_dict[bundle] export_context = get_export_required_files(bundle, download_path, use_onnx=True, use_trace=True) + export_context.update(extra_parameters) for precision in ["fp32", "fp16"]: export_context["precision"] = precision try: diff --git a/models/pancreas_ct_dints_segmentation/configs/inference_trt.yaml b/models/pancreas_ct_dints_segmentation/configs/inference_trt.yaml new file mode 100644 index 00000000..1bb4820d --- /dev/null +++ b/models/pancreas_ct_dints_segmentation/configs/inference_trt.yaml @@ -0,0 +1,8 @@ +--- +imports: +- "$import glob" +- "$import os" +- "$import torch_tensorrt" +handlers#0#_disabled_: true +network_def: "$torch.jit.load(@bundle_root + '/models/model_trt.ts')" +evaluator#amp: false diff --git a/models/pancreas_ct_dints_segmentation/configs/metadata.json b/models/pancreas_ct_dints_segmentation/configs/metadata.json index 282fd360..2fd6955f 100644 --- a/models/pancreas_ct_dints_segmentation/configs/metadata.json +++ b/models/pancreas_ct_dints_segmentation/configs/metadata.json @@ -1,7 +1,8 @@ { "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json", - "version": "0.4.2", + "version": "0.4.3", "changelog": { + "0.4.3": "add support for TensorRT conversion and inference", "0.4.2": "update search function to match monai 1.2", "0.4.1": "fix the wrong GPU index issue of multi-node", "0.4.0": "remove error dollar symbol in readme", diff --git a/models/pancreas_ct_dints_segmentation/docs/README.md b/models/pancreas_ct_dints_segmentation/docs/README.md index 2bf85179..8ef4d171 100644 --- a/models/pancreas_ct_dints_segmentation/docs/README.md +++ b/models/pancreas_ct_dints_segmentation/docs/README.md @@ -79,6 +79,31 @@ The mean dice score over 3200 epochs (the bright curve is smoothed, and the dark ![Validation mean dice score over 3200 epochs (the bright curve is smoothed, and the dark one is the actual curve)](https://developer.download.nvidia.com/assets/Clara/Images/clara_pt_net_arch_search_segmentation_validation_4-3.png) +#### TensorRT speedup +This bundle supports acceleration with TensorRT. The table below displays the speedup ratios observed on an A100 80G GPU. + +| method | torch_fp32(ms) | torch_amp(ms) | trt_fp32(ms) | trt_fp16(ms) | speedup amp | speedup fp32 | speedup fp16 | amp vs fp16| +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +| model computation | 54611.72 | 19240.66 | 16104.8 | 11443.57 | 2.84 | 3.39 | 4.77 | 1.68 | +| end2end | 133.93 | 43.41 | 35.65 | 26.63 | 3.09 | 3.76 | 5.03 | 1.63 | + +Where: +- `model computation` means the speedup ratio of model's inference with a random input without preprocessing and postprocessing +- `end2end` means run the bundle end-to-end with the TensorRT based model. +- `torch_fp32` and `torch_amp` are for the PyTorch models with or without `amp` mode. +- `trt_fp32` and `trt_fp16` are for the TensorRT based models converted in corresponding precision. +- `speedup amp`, `speedup fp32` and `speedup fp16` are the speedup ratios of corresponding models versus the PyTorch float32 model +- `amp vs fp16` is the speedup ratio between the PyTorch amp model and the TensorRT float16 based model. + +This result is benchmarked under: + - TensorRT: 8.6.1+cuda12.0 + - Torch-TensorRT Version: 1.4.0 + - CPU Architecture: x86-64 + - OS: ubuntu 20.04 + - Python version:3.8.10 + - CUDA version: 12.1 + - GPU models and configuration: A100 80G + ### Searched Architecture Visualization Users can install Graphviz for visualization of searched architectures (needed in [decode_plot.py](https://github.com/Project-MONAI/tutorials/blob/main/automl/DiNTS/decode_plot.py)). The edges between nodes indicate global structure, and numbers next to edges represent different operations in the cell searching space. An example of searched architecture is shown as follows: @@ -137,6 +162,18 @@ python -m monai.bundle run --config_file configs/inference.yaml python -m monai.bundle ckpt_export network_def --filepath models/model.ts --ckpt_file models/model.pt --meta_file configs/metadata.json --config_file configs/inference.yaml ``` +#### Export checkpoint to TensorRT based models with fp32 or fp16 precision: + +``` +python -m monai.bundle trt_export --net_id network_def --filepath models/model_trt.ts --ckpt_file models/model.pt --meta_file configs/metadata.json --config_file configs/inference.yaml --precision --use_trace "True" --dynamic_batchsize "[1, 4, 8]" --converter_kwargs "{'truncate_long_and_double':True, 'torch_executed_ops': ['aten::upsample_trilinear3d']}" +``` + +#### Execute inference with the TensorRT model: + +``` +python -m monai.bundle run --config_file "['configs/inference.yaml', 'configs/inference_trt.yaml']" +``` + # References [1] He, Y., Yang, D., Roth, H., Zhao, C. and Xu, D., 2021. Dints: Differentiable neural network topology search for 3d medical image segmentation. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 5841-5850). diff --git a/requirements-dev.txt b/requirements-dev.txt index 77bda4c6..32da5669 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -30,3 +30,4 @@ scikit-learn pandas cucim==22.8.1; platform_system == "Linux" scikit-image>=0.19.0 +PyGithub