From e689240184ef4425c5969de19376c082e78495cf Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 29 Oct 2024 13:06:07 -0400 Subject: [PATCH 1/2] PaRSEC: check for universal peer access Check whether thinks that all devices can access the memory of their peers. If so, we do not need to push modified data back to the host as PaRSEC will use D2D transfers instead of the host copy. We currently stick to the owner-computes model so data will always be modified on the same device and may only be read on a different device. Signed-off-by: Joseph Schuchart --- ttg/ttg/parsec/ttg.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h index 0a0ddefcb..335ad0327 100644 --- a/ttg/ttg/parsec/ttg.h +++ b/ttg/ttg/parsec/ttg.h @@ -228,6 +228,8 @@ namespace ttg_parsec { return im; } + inline bool all_devices_peer_access; + } // namespace detail class WorldImpl : public ttg::base::WorldImplBase { @@ -1075,6 +1077,21 @@ namespace ttg_parsec { detail::max_inline_size = inline_size; } } + + bool all_peer_access = true; + /* check whether all GPUs can access all peer GPUs */ + for (int i = 0; (i < parsec_nb_devices) && all_peer_access; ++i) { + parsec_device_module_t *device = parsec_mca_device_get(i); + if (PARSEC_DEV_IS_GPU(device->type)) { + parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t*)device; + for (int j = 0; (j < parsec_nb_devices) && all_peer_access; ++j) { + if (PARSEC_DEV_IS_GPU(device->type)) { + all_peer_access &= (gpu_device->peer_access_mask & j); + } + } + } + } + detail::all_devices_peer_access = all_peer_access; } inline void ttg_finalize() { // We need to notify the current taskpool of termination if we are in user termination detection mode @@ -3403,9 +3420,14 @@ namespace ttg_parsec { if constexpr (value_is_const) { if (caller->data_flags & detail::ttg_parsec_data_flags::IS_MODIFIED) { - /* The data has been modified previously. PaRSEC requires us to pushout - * data if we transition from a writer to one or more readers. */ - need_pushout = true; + /* The data has been modified previously. If not all devices can access + * their peers then we need to push out to the host so that all devices + * have the data available for reading. + * NOTE: we currently don't allow users to force the next writer to be + * on a different device. In that case PaRSEC would take the host-side + * copy. If we change our restriction we need to revisit this. + * Ideally, PaRSEC would take the device copy if the owner moves... */ + need_pushout = !detail::all_devices_peer_access; } /* check for multiple readers */ From e96cd59a88bfc295b5e62e20055b7c46c0dd2ed0 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 7 Nov 2024 14:50:03 -0500 Subject: [PATCH 2/2] Device check: use logical && instead of bitwise & Co-authored-by: Aurelien Bouteiller --- ttg/ttg/parsec/ttg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h index 335ad0327..b9d095bb4 100644 --- a/ttg/ttg/parsec/ttg.h +++ b/ttg/ttg/parsec/ttg.h @@ -1086,7 +1086,7 @@ namespace ttg_parsec { parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t*)device; for (int j = 0; (j < parsec_nb_devices) && all_peer_access; ++j) { if (PARSEC_DEV_IS_GPU(device->type)) { - all_peer_access &= (gpu_device->peer_access_mask & j); + all_peer_access = all_peer_access && (gpu_device->peer_access_mask & (1<