From b157bbf226b15e50ab4b689db4201549f95af401 Mon Sep 17 00:00:00 2001
From: Tanmay Verma <tanmay2592@gmail.com>
Date: Tue, 18 Jun 2024 13:20:53 -0700
Subject: [PATCH 1/3] Add cusparseLt in the installation to support 24.06
 (#132)

* Add cusparseLt in the installation to support 24.06

* Fix the arm build
---
 CMakeLists.txt | 4 ++++
 README.md      | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 315f036..0a1d3d4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -229,6 +229,8 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so
+    # TODO: Revisit when not needed by making it part of cuda base container.
+    COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda-12.5/targets/${LIBS_ARCH}-linux/lib/libcusparseLt.so libcusparseLt.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so
     COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi"
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin/torchtrtc torchtrtc || echo "error ignored..." || true
@@ -434,6 +436,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
   install(
     FILES
       ${PT_LIB_PATHS}
+      ${CMAKE_CURRENT_BINARY_DIR}/libcusparseLt.so
       ${CMAKE_CURRENT_BINARY_DIR}/LICENSE.pytorch
     DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/pytorch
   )
@@ -474,6 +477,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
         COMMAND ln -sf libopencv_flann.so libopencv_flann.so.${OPENCV_VERSION}
         COMMAND ln -sf libpng16.so libpng16.so.16
         COMMAND ln -sf libjpeg.so libjpeg.so.8
+        COMMAND ln -sf libcusparseLt.so libcusparseLt.so.0
         RESULT_VARIABLE LINK_STATUS
         WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/backends/pytorch)
       if(LINK_STATUS AND NOT LINK_STATUS EQUAL 0)
diff --git a/README.md b/README.md
index 731a7c3..8ed211f 100644
--- a/README.md
+++ b/README.md
@@ -146,11 +146,11 @@ key: "INFERENCE_MODE"
 
 * `DISABLE_CUDNN`: Boolean flag to disable the cuDNN library. By default, cuDNN is enabled.
 
-[cuDNN](https://developer.nvidia.com/cudnn) is a GPU-accelerated library of primitives for 
+[cuDNN](https://developer.nvidia.com/cudnn) is a GPU-accelerated library of primitives for
 deep neural networks. cuDNN provides highly tuned implementations for standard routines.
 
 Typically, models run with cuDNN enabled are faster. However there are some exceptions
-where using cuDNN can be slower, cause higher memory usage or result in errors. 
+where using cuDNN can be slower, cause higher memory usage or result in errors.
 
 
 The section of model config file specifying this parameter will look like:

From cf8c1954b2f1050843846ba1490f22b1df63c975 Mon Sep 17 00:00:00 2001
From: Tanmay Verma <tanmay2592@gmail.com>
Date: Tue, 18 Jun 2024 17:09:47 -0700
Subject: [PATCH 2/3] Fix library paths on ARM-SBSA (#133)

* Add cusparseLt in the installation to support 24.06

* Fix the arm build

* Fix the lib path
---
 CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a1d3d4..77f3965 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -160,6 +160,7 @@ endif() # TRITON_PYTORCH_ENABLE_TORCHTRT
 
 if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
   set(LIBS_ARCH "aarch64")
+  set(CUSPARSE_ARCH "sbsa")
   set(LIBTORCH_LIBS
       "libopenblas.so.0"
       "libnvpl_blas_core.so.0"
@@ -175,6 +176,7 @@ if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
   )
 else()
   set(LIBS_ARCH "x86_64")
+  set(CUSPARSE_ARCH "x86_64")
   set(LIBTORCH_LIBS
     "libmkl_avx2.so.1"
     "libmkl_avx512.so.1"
@@ -230,7 +232,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so
     # TODO: Revisit when not needed by making it part of cuda base container.
-    COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda-12.5/targets/${LIBS_ARCH}-linux/lib/libcusparseLt.so libcusparseLt.so
+    COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda-12.5/targets/${CUSPARSE_ARCH}-linux/lib/libcusparseLt.so libcusparseLt.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so
     COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi"
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin/torchtrtc torchtrtc || echo "error ignored..." || true

From ddfbe3accaa15d0b502a992d221e1a7873559b29 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com>
Date: Mon, 24 Jun 2024 13:05:50 -0700
Subject: [PATCH 3/3] Fix location of the dependency library. (#134)

---
 CMakeLists.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 77f3965..07744ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -160,7 +160,6 @@ endif() # TRITON_PYTORCH_ENABLE_TORCHTRT
 
 if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
   set(LIBS_ARCH "aarch64")
-  set(CUSPARSE_ARCH "sbsa")
   set(LIBTORCH_LIBS
       "libopenblas.so.0"
       "libnvpl_blas_core.so.0"
@@ -176,7 +175,6 @@ if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
   )
 else()
   set(LIBS_ARCH "x86_64")
-  set(CUSPARSE_ARCH "x86_64")
   set(LIBTORCH_LIBS
     "libmkl_avx2.so.1"
     "libmkl_avx512.so.1"
@@ -232,7 +230,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so
     # TODO: Revisit when not needed by making it part of cuda base container.
-    COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda-12.5/targets/${CUSPARSE_ARCH}-linux/lib/libcusparseLt.so libcusparseLt.so
+    COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda/lib64/libcusparseLt.so libcusparseLt.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so
     COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi"
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin/torchtrtc torchtrtc || echo "error ignored..." || true