From c2d3921f009ea53e5775d6acf1dfb73052838c02 Mon Sep 17 00:00:00 2001 From: Huilin Qu Date: Mon, 14 Feb 2022 17:34:20 +0100 Subject: [PATCH] Add GPU support for ONNXRuntime. --- .../ONNXRuntime/interface/ONNXRuntime.h | 3 ++ PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc | 40 +++++++++++++++++-- PhysicsTools/ONNXRuntime/test/BuildFile.xml | 1 + .../ONNXRuntime/test/testONNXRuntime.cc | 28 ++++++++++--- 4 files changed, 63 insertions(+), 9 deletions(-) diff --git a/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h b/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h index f49ddcbd05b6a..a050b8d443137 100644 --- a/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h +++ b/PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h @@ -21,6 +21,7 @@ namespace cms::Ort { typedef std::vector> FloatArrays; + enum GPUMode { no_gpu, auto_gpu, force_gpu }; class ONNXRuntime { public: @@ -29,6 +30,8 @@ namespace cms::Ort { ONNXRuntime& operator=(const ONNXRuntime&) = delete; ~ONNXRuntime(); + static ::Ort::SessionOptions defaultSessionOptions(GPUMode gpu_mode = no_gpu); + // Run inference and get outputs // input_names: list of the names of the input nodes. // input_values: list of input arrays for each input node. The order of `input_values` must match `input_names`. diff --git a/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc b/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc index 1845d0cc64d82..d27fad1f7cdab 100644 --- a/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc +++ b/PhysicsTools/ONNXRuntime/src/ONNXRuntime.cc @@ -15,6 +15,7 @@ #include #include #include +#include namespace cms::Ort { @@ -25,11 +26,9 @@ namespace cms::Ort { ONNXRuntime::ONNXRuntime(const std::string& model_path, const SessionOptions* session_options) { // create session if (session_options) { - session_ = std::make_unique(env_, model_path.c_str(), *session_options); + session_.reset(new Session(env_, model_path.c_str(), *session_options)); } else { - SessionOptions sess_opts; - sess_opts.SetIntraOpNumThreads(1); - session_ = std::make_unique(env_, model_path.c_str(), sess_opts); + session_.reset(new Session(env_, model_path.c_str(), defaultSessionOptions())); } AllocatorWithDefaultOptions allocator; @@ -78,6 +77,35 @@ namespace cms::Ort { ONNXRuntime::~ONNXRuntime() {} + SessionOptions ONNXRuntime::defaultSessionOptions(GPUMode gpu_mode) { + SessionOptions sess_opts; + sess_opts.SetIntraOpNumThreads(1); + if (gpu_mode != no_gpu) { + // detect if there is GPU + int devices = 0; + auto status = cudaGetDeviceCount(&devices); + bool is_gpu_available = (status == cudaSuccess && devices > 0); + if (is_gpu_available) { + // https://www.onnxruntime.ai/docs/reference/execution-providers/CUDA-ExecutionProvider.html + OrtCUDAProviderOptions options; + options.device_id = 0; + options.arena_extend_strategy = 0; + options.cuda_mem_limit = std::numeric_limits::max(); + options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch::EXHAUSTIVE; + options.do_copy_in_default_stream = 1; + sess_opts.AppendExecutionProvider_CUDA(options); + } else { + // if GPU is not available + if (gpu_mode == force_gpu) { + throw cms::Exception("RuntimeError") << "No GPU detected, cannot run ONNXRuntime on GPU."; + } else { + std::cout << "[ONNXRuntime] No GPU detected, will run on CPU." << std::endl; + } + } + } + return sess_opts; + } + FloatArrays ONNXRuntime::run(const std::vector& input_names, FloatArrays& input_values, const std::vector>& input_shapes, @@ -104,6 +132,10 @@ namespace cms::Ort { } else { input_dims = input_shapes[input_pos]; // rely on the given input_shapes to set the batch size + if (input_dims[0] != batch_size) { + throw cms::Exception("RuntimeError") << "The first element of `input_shapes` (" << input_dims[0] + << ") does not match the given `batch_size` (" << batch_size << ")"; + } } auto expected_len = std::accumulate(input_dims.begin(), input_dims.end(), 1, std::multiplies()); if (expected_len != (int64_t)value->size()) { diff --git a/PhysicsTools/ONNXRuntime/test/BuildFile.xml b/PhysicsTools/ONNXRuntime/test/BuildFile.xml index b8af87ffa32de..cb02c30d2f9c9 100644 --- a/PhysicsTools/ONNXRuntime/test/BuildFile.xml +++ b/PhysicsTools/ONNXRuntime/test/BuildFile.xml @@ -4,5 +4,6 @@ + diff --git a/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc b/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc index 5de1da9b9aa44..22a59b2d91009 100644 --- a/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc +++ b/PhysicsTools/ONNXRuntime/test/testONNXRuntime.cc @@ -2,26 +2,34 @@ #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" #include "FWCore/ParameterSet/interface/FileInPath.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include #include using namespace cms::Ort; class testONNXRuntime : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(testONNXRuntime); - CPPUNIT_TEST(checkAll); + CPPUNIT_TEST(checkCPU); + CPPUNIT_TEST(checkGPU); + CPPUNIT_TEST(checkAuto); CPPUNIT_TEST_SUITE_END(); +private: + void test(GPUMode gpu_mode = no_gpu); + public: - void checkAll(); + void checkCPU(); + void checkGPU(); + void checkAuto(); }; CPPUNIT_TEST_SUITE_REGISTRATION(testONNXRuntime); -void testONNXRuntime::checkAll() { +void testONNXRuntime::test(GPUMode gpu_mode) { std::string model_path = edm::FileInPath("PhysicsTools/ONNXRuntime/test/data/model.onnx").fullPath(); - ONNXRuntime rt(model_path); + auto session_options = ONNXRuntime::defaultSessionOptions(gpu_mode); + ONNXRuntime rt(model_path, &session_options); for (const unsigned batch_size : {1, 2, 4}) { FloatArrays input_values{ std::vector(batch_size * 2, 1), @@ -35,3 +43,13 @@ void testONNXRuntime::checkAll() { } } } + +void testONNXRuntime::checkCPU() { test(); } + +void testONNXRuntime::checkGPU() { + if (cms::cudatest::testDevices()) { + test(force_gpu); + } +} + +void testONNXRuntime::checkAuto() { test(auto_gpu); }