Skip to content

Commit

Permalink
Release any queues left when atexit is called.
Browse files Browse the repository at this point in the history
This is due to an issue with dpc++ where it can leave temporary queues
unreleased. This can then have the effect on the closing down of the
device.

This is resolved by registering and deregistering the queues on creation
and deletion and then releasing any queues which still have an external
reference when `atexit` is called. This required an additional device
mutex to avoid threading issues.

This should be reviewed when intel/llvm#11156
is fixed.
  • Loading branch information
coldav committed Oct 9, 2023
1 parent fcd8a60 commit 720cab7
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 1 deletion.
23 changes: 23 additions & 0 deletions source/cl/include/cl/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include <compiler/info.h>
#include <mux/mux.h>

#include <mutex>
#include <set>
#include <string>

/// @addtogroup cl
Expand All @@ -55,6 +57,25 @@ struct _cl_device_id final : public cl::base<_cl_device_id> {
/// @brief Destructor.
~_cl_device_id();

/// @brief Register a command queue with the device
/// This should usually be called on creation of the queue
void RegisterCommandQueue(cl_command_queue queue);

/// @brief Deregister a command queue with the device
/// This should usually be called on deletion of the queue
void DeregisterCommandQueue(cl_command_queue queue);

/// @brief Release any external queues that are still around.
/// This should only be called when we know that the application
/// is no longer in a position to do so e.g. at exit
/// @note This is to workaround an issue with dpc++ where temporary queues
/// can be left at exit if out of order queues are not supported.
/// This should be reviewed when https://github.com/intel/llvm/issues/11156 is
/// resolved.
void ReleaseAllExternalQueues();

std::set<cl_command_queue> registered_queues;

/// @brief Platform the device belongs to.
cl_platform_id platform;
/// @brief Mux allocator info.
Expand Down Expand Up @@ -352,6 +373,8 @@ struct _cl_device_id final : public cl::base<_cl_device_id> {
/// TODO: Should probably be a core property, see CA-2717.
size_t preferred_work_group_size_multiple;
#endif
// Used to keep the registering of queues thread safe
std::mutex device_lock;
};

/// @}
Expand Down
2 changes: 2 additions & 0 deletions source/cl/source/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ _cl_command_queue::_cl_command_queue(cl_context context, cl_device_id device,
in_flush(false) {
cl::retainInternal(context);
cl::retainInternal(device);
{ device->RegisterCommandQueue(this); }
}

_cl_command_queue::~_cl_command_queue() {
Expand Down Expand Up @@ -86,6 +87,7 @@ _cl_command_queue::~_cl_command_queue() {
muxDestroyQueryPool(mux_queue, counter_queries, device->mux_allocator);
}

device->DeregisterCommandQueue(this);
cl::releaseInternal(device);
cl::releaseInternal(context);
}
Expand Down
23 changes: 23 additions & 0 deletions source/cl/source/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cargo/string_algorithm.h>
#include <cargo/string_view.h>
#include <cl/binary/binary.h>
#include <cl/command_queue.h>
#include <cl/config.h>
#include <cl/device.h>
#include <cl/limits.h>
Expand Down Expand Up @@ -330,6 +331,28 @@ _cl_device_id::_cl_device_id(cl_platform_id platform,
#endif
}

void _cl_device_id::RegisterCommandQueue(cl_command_queue queue) {
std::lock_guard<std::mutex> lock(device_lock);
registered_queues.insert(queue);
}

void _cl_device_id::DeregisterCommandQueue(cl_command_queue queue) {
std::lock_guard<std::mutex> lock(device_lock);
registered_queues.erase(queue);
}

void _cl_device_id::ReleaseAllExternalQueues() {
// Need to copy as it will deregister as it goes along
auto queues = registered_queues;
for (auto q : queues) {
auto external_count = q->refCountExternal();
for (cl_uint i = 0; i < external_count; i++) {
cl::ReleaseCommandQueue(q);
}
}
registered_queues.clear();
}

_cl_device_id::~_cl_device_id() {
muxDestroyDevice(mux_device, mux_allocator);
cl::releaseInternal(platform);
Expand Down
3 changes: 2 additions & 1 deletion source/cl/source/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,12 @@ cargo::expected<cl_platform_id, cl_int> _cl_platform_id::getInstance() {

#if !defined(CA_PLATFORM_WINDOWS)
// Add an atexit handler to destroy the cl_platform_id. This is not done on
// Windows because DLL's which we rely on are not guarenteed to be loaded
// Windows because DLL's which we rely on are not guaranteed to be loaded
// when atexit handlers are invoked, the advice given by Microsoft is not
// to perform any tear down at all.
atexit([]() {
for (auto device : platform.value()->devices) {
device->ReleaseAllExternalQueues();
cl::releaseInternal(device);
}
cl::releaseInternal(platform.value());
Expand Down

0 comments on commit 720cab7

Please sign in to comment.