From b67c44189ef058a311d7be1b14b460dd15d2893b Mon Sep 17 00:00:00 2001 From: hyperfraise Date: Tue, 23 Nov 2021 11:23:25 +0100 Subject: [PATCH 1/4] Implement stream priority feature --- doc/driver.rst | 4 ++-- src/cpp/cuda.hpp | 4 ++-- src/wrapper/wrap_cudadrv.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/driver.rst b/doc/driver.rst index 86fa2e1b..288ed7cb 100644 --- a/doc/driver.rst +++ b/doc/driver.rst @@ -331,7 +331,7 @@ Constants CUDA 6.0 and above. .. versionadded:: 2014.1 - + .. attribute :: HOST_NATIVE_ATOMIC_SUPPORTED SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO PAGEABLE_MEMORY_ACCESS @@ -813,7 +813,7 @@ Devices and Contexts Concurrency and Streams ----------------------- -.. class:: Stream(flags=0) +.. class:: Stream(flags=0, priority=0) A handle for a queue of operations that will be carried out in order. diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp index 21cb219c..655f595b 100644 --- a/src/cpp/cuda.hpp +++ b/src/cpp/cuda.hpp @@ -997,8 +997,8 @@ namespace pycuda CUstream m_stream; public: - stream(unsigned int flags=0) - { CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); } + stream(unsigned int flags=0, int priority=0) + { CUDAPP_CALL_GUARDED(cuStreamCreateWithPriority, (&m_stream, flags, priority)); } ~stream() { diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index 6d95edad..27b364ac 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -1199,7 +1199,7 @@ BOOST_PYTHON_MODULE(_driver) { typedef stream cl; py::class_ > - ("Stream", py::init(py::arg("flags")=0)) + ("Stream", py::init(py::arg("flags")=0, py::arg("priority")=0)) .DEF_SIMPLE_METHOD(synchronize) .DEF_SIMPLE_METHOD(is_done) #if CUDAPP_CUDA_VERSION >= 3020 From 6207b0456c64138d4c52130deed3b1502b1e8416 Mon Sep 17 00:00:00 2001 From: hyperfraise Date: Tue, 26 Apr 2022 17:46:16 +0200 Subject: [PATCH 2/4] add stream priority range function --- doc/driver.rst | 4 ++++ src/cpp/cuda.hpp | 11 ++++++++++- src/wrapper/wrap_cudadrv.cpp | 3 +++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/driver.rst b/doc/driver.rst index 288ed7cb..528b5297 100644 --- a/doc/driver.rst +++ b/doc/driver.rst @@ -644,6 +644,10 @@ Devices and Contexts See also :mod:`pycuda.autoinit`. +.. function:: get_stream_priority_range() + + Returns numerical values that correspond to the least and greatest stream priorities. + .. class:: Device(number) Device(pci_bus_id) diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp index 655f595b..fc4481d6 100644 --- a/src/cpp/cuda.hpp +++ b/src/cpp/cuda.hpp @@ -531,7 +531,6 @@ namespace pycuda * to push contexts that are already active at a deeper stack level, so we * maintain all contexts floating other than the top one. */ - // for friend decl namespace gl { boost::shared_ptr @@ -862,6 +861,16 @@ namespace pycuda return result; } + inline + py::tuple get_stream_priority_range() + { + int leastPriority; + int greatestPriority; + CUDAPP_CALL_GUARDED(cuCtxGetStreamPriorityRange, (&leastPriority, &greatestPriority)); + return py::make_tuple(leastPriority, greatestPriority); + } + + #if CUDAPP_CUDA_VERSION >= 7000 inline boost::shared_ptr device::retain_primary_context() diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index 27b364ac..0e0b4627 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -1193,6 +1193,9 @@ BOOST_PYTHON_MODULE(_driver) .add_property("handle", &cl::handle_int) ; } + + DEF_SIMPLE_FUNCTION(get_stream_priority_range); + // }}} // {{{ stream From c28127d77a2482707c3fe69b2c11eb05a4eaf883 Mon Sep 17 00:00:00 2001 From: hyperfraise Date: Tue, 26 Apr 2022 17:46:20 +0200 Subject: [PATCH 3/4] add ci test --- test/test_driver.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/test_driver.py b/test/test_driver.py index 98f3c8aa..6218f00a 100644 --- a/test/test_driver.py +++ b/test/test_driver.py @@ -935,6 +935,29 @@ def test_register_host_memory(self): drv.memcpy_htod_async(gpu_ary, a_pin, stream) drv.Context.synchronize() + @mark_cuda_test + def test_stream_priority_setting(self): + if drv.get_version() < (4,): + from py.test import skip + + skip("register_host_memory only exists on CUDA 4.0 and later") + + import sys + + if sys.platform == "darwin": + from py.test import skip + + skip("register_host_memory is not supported on OS X") + + a = drv.aligned_empty((2 ** 20,), np.float64) + a_pin = drv.register_host_memory(a) + + gpu_ary = drv.mem_alloc_like(a) + min_priority, max_priority = drv.get_stream_priority_range() + stream = drv.Stream(priority=np.random.choice(range(min_priority, max_priority))) + drv.memcpy_htod_async(gpu_ary, a_pin, stream) + drv.Context.synchronize() + @mark_cuda_test # https://github.com/inducer/pycuda/issues/45 def test_recursive_launch(self): From 258843615c311af1ffa71396f9cd3080c9e553ec Mon Sep 17 00:00:00 2001 From: hyperfraise Date: Tue, 26 Apr 2022 17:46:27 +0200 Subject: [PATCH 4/4] account for old versions of cuda --- src/cpp/cuda.hpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp index fc4481d6..c98f056f 100644 --- a/src/cpp/cuda.hpp +++ b/src/cpp/cuda.hpp @@ -861,6 +861,7 @@ namespace pycuda return result; } +#if CUDAPP_CUDA_VERSION >= 7500 inline py::tuple get_stream_priority_range() { @@ -869,6 +870,7 @@ namespace pycuda CUDAPP_CALL_GUARDED(cuCtxGetStreamPriorityRange, (&leastPriority, &greatestPriority)); return py::make_tuple(leastPriority, greatestPriority); } +#endif @@ -1006,8 +1008,17 @@ namespace pycuda CUstream m_stream; public: - stream(unsigned int flags=0, int priority=0) - { CUDAPP_CALL_GUARDED(cuStreamCreateWithPriority, (&m_stream, flags, priority)); } + + #if CUDAPP_CUDA_VERSION >= 7500 + stream(unsigned int flags=0, int priority=0) + { CUDAPP_CALL_GUARDED(cuStreamCreateWithPriority, (&m_stream, flags, priority)); } + #else + if (priority != 0) + throw pycuda::error("stream", CUDA_ERROR_INVALID_HANDLE, + "priority!=0 setting isn't supported for your CUDA version"); + stream(unsigned int flags=0) + { CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); } + #endif ~stream() {