From c2d3921f009ea53e5775d6acf1dfb73052838c02 Mon Sep 17 00:00:00 2001
From: Huilin Qu <huilin.qu@cern.ch>
Date: Mon, 14 Feb 2022 17:34:20 +0100
Subject: [PATCH] Add GPU support for ONNXRuntime.

---
 .../ONNXRuntime/interface/ONNXRuntime.h       |  3 ++
 PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc   | 40 +++++++++++++++++--
 PhysicsTools/ONNXRuntime/test/BuildFile.xml   |  1 +
 .../ONNXRuntime/test/testONNXRuntime.cc       | 28 ++++++++++---
 4 files changed, 63 insertions(+), 9 deletions(-)
diff --git a/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h b/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h
index f49ddcbd05b6a..a050b8d443137 100644
--- a/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h
+++ b/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h
@@ -21,6 +21,7 @@
 namespace cms::Ort {
 
   typedef std::vector<std::vector<float>> FloatArrays;
+  enum GPUMode { no_gpu, auto_gpu, force_gpu };
 
   class ONNXRuntime {
   public:
@@ -29,6 +30,8 @@ namespace cms::Ort {
     ONNXRuntime& operator=(const ONNXRuntime&) = delete;
     ~ONNXRuntime();
 
+    static ::Ort::SessionOptions defaultSessionOptions(GPUMode gpu_mode = no_gpu);
+
     // Run inference and get outputs
     // input_names: list of the names of the input nodes.
     // input_values: list of input arrays for each input node. The order of `input_values` must match `input_names`.
diff --git a/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc b/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc
index 1845d0cc64d82..d27fad1f7cdab 100644
--- a/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc
+++ b/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc
@@ -15,6 +15,7 @@
 #include <iostream>
 #include <memory>
 #include <numeric>
+#include <cuda_runtime.h>
 
 namespace cms::Ort {
 
@@ -25,11 +26,9 @@ namespace cms::Ort {
   ONNXRuntime::ONNXRuntime(const std::string& model_path, const SessionOptions* session_options) {
     // create session
     if (session_options) {
-      session_ = std::make_unique<Session>(env_, model_path.c_str(), *session_options);
+      session_.reset(new Session(env_, model_path.c_str(), *session_options));
     } else {
-      SessionOptions sess_opts;
-      sess_opts.SetIntraOpNumThreads(1);
-      session_ = std::make_unique<Session>(env_, model_path.c_str(), sess_opts);
+      session_.reset(new Session(env_, model_path.c_str(), defaultSessionOptions()));
     }
     AllocatorWithDefaultOptions allocator;
 
@@ -78,6 +77,35 @@ namespace cms::Ort {
 
   ONNXRuntime::~ONNXRuntime() {}
 
+  SessionOptions ONNXRuntime::defaultSessionOptions(GPUMode gpu_mode) {
+    SessionOptions sess_opts;
+    sess_opts.SetIntraOpNumThreads(1);
+    if (gpu_mode != no_gpu) {
+      // detect if there is GPU
+      int devices = 0;
+      auto status = cudaGetDeviceCount(&devices);
+      bool is_gpu_available = (status == cudaSuccess && devices > 0);
+      if (is_gpu_available) {
+        // https://www.onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html
+        OrtCUDAProviderOptions options;
+        options.device_id = 0;
+        options.arena_extend_strategy = 0;
+        options.cuda_mem_limit = std::numeric_limits<std::size_t>::max();
+        options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch::EXHAUSTIVE;
+        options.do_copy_in_default_stream = 1;
+        sess_opts.AppendExecutionProvider_CUDA(options);
+      } else {
+        // if GPU is not available
+        if (gpu_mode == force_gpu) {
+          throw cms::Exception("RuntimeError") << "No GPU detected, cannot run ONNXRuntime on GPU.";
+        } else {
+          std::cout << "[ONNXRuntime] No GPU detected, will run on CPU." << std::endl;
+        }
+      }
+    }
+    return sess_opts;
+  }
+
   FloatArrays ONNXRuntime::run(const std::vector<std::string>& input_names,
                                FloatArrays& input_values,
                                const std::vector<std::vector<int64_t>>& input_shapes,
@@ -104,6 +132,10 @@ namespace cms::Ort {
       } else {
         input_dims = input_shapes[input_pos];
         // rely on the given input_shapes to set the batch size
+        if (input_dims[0] != batch_size) {
+          throw cms::Exception("RuntimeError") << "The first element of `input_shapes` (" << input_dims[0]
+                                               << ") does not match the given `batch_size` (" << batch_size << ")";
+        }
       }
       auto expected_len = std::accumulate(input_dims.begin(), input_dims.end(), 1, std::multiplies<int64_t>());
       if (expected_len != (int64_t)value->size()) {
diff --git a/PhysicsTools/ONNXRuntime/test/BuildFile.xml b/PhysicsTools/ONNXRuntime/test/BuildFile.xml
index b8af87ffa32de..cb02c30d2f9c9 100644
--- a/PhysicsTools/ONNXRuntime/test/BuildFile.xml
+++ b/PhysicsTools/ONNXRuntime/test/BuildFile.xml
@@ -4,5 +4,6 @@
 <bin name="testONNXRuntime" file="testRunner.cpp, testONNXRuntime.cc">
   <use name="cppunit"/>
   <use name="PhysicsTools/ONNXRuntime"/>
+  <use name="HeterogeneousCore/CUDAUtilities"/>
   <use name="FWCore/ParameterSet"/>
 </bin>
diff --git a/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc b/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc
index 5de1da9b9aa44..22a59b2d91009 100644
--- a/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc
+++ b/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc
@@ -2,26 +2,34 @@
 
 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
 #include "FWCore/ParameterSet/interface/FileInPath.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
 
-#include <chrono>
 #include <iostream>
 
 using namespace cms::Ort;
 
 class testONNXRuntime : public CppUnit::TestFixture {
   CPPUNIT_TEST_SUITE(testONNXRuntime);
-  CPPUNIT_TEST(checkAll);
+  CPPUNIT_TEST(checkCPU);
+  CPPUNIT_TEST(checkGPU);
+  CPPUNIT_TEST(checkAuto);
   CPPUNIT_TEST_SUITE_END();
 
+private:
+  void test(GPUMode gpu_mode = no_gpu);
+
 public:
-  void checkAll();
+  void checkCPU();
+  void checkGPU();
+  void checkAuto();
 };
 
 CPPUNIT_TEST_SUITE_REGISTRATION(testONNXRuntime);
 
-void testONNXRuntime::checkAll() {
+void testONNXRuntime::test(GPUMode gpu_mode) {
   std::string model_path = edm::FileInPath("PhysicsTools/ONNXRuntime/test/data/model.onnx").fullPath();
-  ONNXRuntime rt(model_path);
+  auto session_options = ONNXRuntime::defaultSessionOptions(gpu_mode);
+  ONNXRuntime rt(model_path, &session_options);
   for (const unsigned batch_size : {1, 2, 4}) {
     FloatArrays input_values{
         std::vector<float>(batch_size * 2, 1),
@@ -35,3 +43,13 @@ void testONNXRuntime::checkAll() {
     }
   }
 }
+
+void testONNXRuntime::checkCPU() { test(); }
+
+void testONNXRuntime::checkGPU() {
+  if (cms::cudatest::testDevices()) {
+    test(force_gpu);
+  }
+}
+
+void testONNXRuntime::checkAuto() { test(auto_gpu); }