diff --git a/source/cl/include/cl/device.h b/source/cl/include/cl/device.h index e538fcb4c..f83d52553 100644 --- a/source/cl/include/cl/device.h +++ b/source/cl/include/cl/device.h @@ -32,6 +32,8 @@ #include #include +#include +#include #include /// @addtogroup cl @@ -55,6 +57,25 @@ struct _cl_device_id final : public cl::base<_cl_device_id> { /// @brief Destructor. ~_cl_device_id(); + /// @brief Register a command queue with the device + /// This should usually be called on creation of the queue + void RegisterCommandQueue(cl_command_queue queue); + + /// @brief Deregister a command queue with the device + /// This should usually be called on deletion of the queue + void DeregisterCommandQueue(cl_command_queue queue); + + /// @brief Release any external queues that are still around. + /// This should only be called when we know that the application + /// is no longer in a position to do so e.g. at exit + /// @note This is to workaround an issue with dpc++ where temporary queues + /// can be left at exit if out of order queues are not supported. + /// This should be reviewed when https://github.com/intel/llvm/issues/11156 is + /// resolved. + void ReleaseAllExternalQueues(); + + std::set registered_queues; + /// @brief Platform the device belongs to. cl_platform_id platform; /// @brief Mux allocator info. @@ -352,6 +373,8 @@ struct _cl_device_id final : public cl::base<_cl_device_id> { /// TODO: Should probably be a core property, see CA-2717. size_t preferred_work_group_size_multiple; #endif + // Used to keep the registering of queues thread safe + std::mutex device_lock; }; /// @} diff --git a/source/cl/source/command_queue.cpp b/source/cl/source/command_queue.cpp index ddccbd190..58482cac9 100644 --- a/source/cl/source/command_queue.cpp +++ b/source/cl/source/command_queue.cpp @@ -52,6 +52,7 @@ _cl_command_queue::_cl_command_queue(cl_context context, cl_device_id device, in_flush(false) { cl::retainInternal(context); cl::retainInternal(device); + { device->RegisterCommandQueue(this); } } _cl_command_queue::~_cl_command_queue() { @@ -86,6 +87,7 @@ _cl_command_queue::~_cl_command_queue() { muxDestroyQueryPool(mux_queue, counter_queries, device->mux_allocator); } + device->DeregisterCommandQueue(this); cl::releaseInternal(device); cl::releaseInternal(context); } diff --git a/source/cl/source/device.cpp b/source/cl/source/device.cpp index d67792807..766aee1a4 100644 --- a/source/cl/source/device.cpp +++ b/source/cl/source/device.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -330,6 +331,28 @@ _cl_device_id::_cl_device_id(cl_platform_id platform, #endif } +void _cl_device_id::RegisterCommandQueue(cl_command_queue queue) { + std::lock_guard lock(device_lock); + registered_queues.insert(queue); +} + +void _cl_device_id::DeregisterCommandQueue(cl_command_queue queue) { + std::lock_guard lock(device_lock); + registered_queues.erase(queue); +} + +void _cl_device_id::ReleaseAllExternalQueues() { + // Need to copy as it will deregister as it goes along + auto queues = registered_queues; + for (auto q : queues) { + auto external_count = q->refCountExternal(); + for (cl_uint i = 0; i < external_count; i++) { + cl::ReleaseCommandQueue(q); + } + } + registered_queues.clear(); +} + _cl_device_id::~_cl_device_id() { muxDestroyDevice(mux_device, mux_allocator); cl::releaseInternal(platform); diff --git a/source/cl/source/platform.cpp b/source/cl/source/platform.cpp index 16f4fd16e..4b89d3705 100644 --- a/source/cl/source/platform.cpp +++ b/source/cl/source/platform.cpp @@ -129,11 +129,12 @@ cargo::expected _cl_platform_id::getInstance() { #if !defined(CA_PLATFORM_WINDOWS) // Add an atexit handler to destroy the cl_platform_id. This is not done on - // Windows because DLL's which we rely on are not guarenteed to be loaded + // Windows because DLL's which we rely on are not guaranteed to be loaded // when atexit handlers are invoked, the advice given by Microsoft is not // to perform any tear down at all. atexit([]() { for (auto device : platform.value()->devices) { + device->ReleaseAllExternalQueues(); cl::releaseInternal(device); } cl::releaseInternal(platform.value());