inducer · dmenig · Nov 23, 2021 · Apr 26, 2022 · Apr 26, 2022 · Apr 26, 2022
diff --git a/doc/driver.rst b/doc/driver.rst
@@ -331,7 +331,7 @@ Constants
         CUDA 6.0 and above.
 
         .. versionadded:: 2014.1
-        
+
     .. attribute :: HOST_NATIVE_ATOMIC_SUPPORTED
         SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO
         PAGEABLE_MEMORY_ACCESS
@@ -644,6 +644,10 @@ Devices and Contexts
 
     See also :mod:`pycuda.autoinit`.
 
+.. function:: get_stream_priority_range()
+
+    Returns numerical values that correspond to the least and greatest stream priorities. 
+
 .. class:: Device(number)
         Device(pci_bus_id)
 
@@ -813,7 +817,7 @@ Devices and Contexts
 Concurrency and Streams
 -----------------------
 
-.. class:: Stream(flags=0)
+.. class:: Stream(flags=0, priority=0)
 
     A handle for a queue of operations that will be carried out in order.
 

diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
@@ -531,7 +531,6 @@ namespace pycuda
    * to push contexts that are already active at a deeper stack level, so we
    * maintain all contexts floating other than the top one.
    */
-
   // for friend decl
   namespace gl {
     boost::shared_ptr<context>
@@ -862,6 +861,18 @@ namespace pycuda
     return result;
   }
 
+#if CUDAPP_CUDA_VERSION >= 7500
+  inline
+  py::tuple get_stream_priority_range()
+  {
+    int leastPriority;
+    int greatestPriority;
+    CUDAPP_CALL_GUARDED(cuCtxGetStreamPriorityRange, (&leastPriority, &greatestPriority));
+    return py::make_tuple(leastPriority, greatestPriority);
+  }
+#endif
+
+
 
 #if CUDAPP_CUDA_VERSION >= 7000
   inline boost::shared_ptr<context> device::retain_primary_context()
@@ -997,8 +1008,17 @@ namespace pycuda
       CUstream m_stream;
 
     public:
-      stream(unsigned int flags=0)
-      { CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); }
+
+      #if CUDAPP_CUDA_VERSION >= 7500
+        stream(unsigned int flags=0, int priority=0)
+        { CUDAPP_CALL_GUARDED(cuStreamCreateWithPriority, (&m_stream, flags, priority)); }
+      #else
+        if (priority != 0)
+          throw pycuda::error("stream", CUDA_ERROR_INVALID_HANDLE,
+            "priority!=0 setting isn't supported for your CUDA version");
+        stream(unsigned int flags=0)
+        { CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); }
+      #endif
 
       ~stream()
       {

diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp
@@ -1193,13 +1193,16 @@ BOOST_PYTHON_MODULE(_driver)
       .add_property("handle", &cl::handle_int)
       ;
   }
+
+  DEF_SIMPLE_FUNCTION(get_stream_priority_range);
+
   // }}}
 
   // {{{ stream
   {
     typedef stream cl;
     py::class_<cl, boost::noncopyable, shared_ptr<cl> >
-      ("Stream", py::init<unsigned int>(py::arg("flags")=0))
+      ("Stream", py::init<unsigned int, int>(py::arg("flags")=0, py::arg("priority")=0))
       .DEF_SIMPLE_METHOD(synchronize)
       .DEF_SIMPLE_METHOD(is_done)
 #if CUDAPP_CUDA_VERSION >= 3020

diff --git a/test/test_driver.py b/test/test_driver.py
@@ -935,6 +935,29 @@ def test_register_host_memory(self):
         drv.memcpy_htod_async(gpu_ary, a_pin, stream)
         drv.Context.synchronize()
 
+    @mark_cuda_test
+    def test_stream_priority_setting(self):
+        if drv.get_version() < (4,):
+            from py.test import skip
+
+            skip("register_host_memory only exists on CUDA 4.0 and later")
+
+        import sys
+
+        if sys.platform == "darwin":
+            from py.test import skip
+
+            skip("register_host_memory is not supported on OS X")
+
+        a = drv.aligned_empty((2 ** 20,), np.float64)
+        a_pin = drv.register_host_memory(a)
+
+        gpu_ary = drv.mem_alloc_like(a)
+        min_priority, max_priority = drv.get_stream_priority_range()
+        stream = drv.Stream(priority=np.random.choice(range(min_priority, max_priority)))
+        drv.memcpy_htod_async(gpu_ary, a_pin, stream)
+        drv.Context.synchronize()
+
     @mark_cuda_test
     # https://github.com/inducer/pycuda/issues/45
     def test_recursive_launch(self):