From 92a63052d122588af30adc64a3ac417a27ae4429 Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 5 Aug 2022 22:01:49 +0200 Subject: [PATCH 1/3] Evolution of the Alpaka EDModule and ESModule API --- DataFormats/Common/interface/DeviceProduct.h | 76 +++++++++ .../interface/alpaka/PortableCollection.h | 17 ++ .../src/alpaka/classes_cuda.h | 1 + .../src/alpaka/classes_cuda_def.xml | 3 + HeterogeneousCore/AlpakaCore/BuildFile.xml | 1 + .../interface/alpaka/DeviceProductType.h | 24 +++ .../AlpakaCore/interface/alpaka/EDGetToken.h | 43 +++++ .../AlpakaCore/interface/alpaka/EDMetadata.h | 99 ++++++++++++ .../alpaka/EDMetadataAcquireSentry.h | 47 ++++++ .../interface/alpaka/EDMetadataSentry.h | 37 +++++ .../AlpakaCore/interface/alpaka/EDPutToken.h | 45 ++++++ .../interface/alpaka/ESDeviceProduct.h | 63 ++++++++ .../AlpakaCore/interface/alpaka/ESGetToken.h | 39 +++++ .../AlpakaCore/interface/alpaka/ESProducer.h | 91 +++++++++++ .../AlpakaCore/interface/alpaka/Event.h | 149 ++++++++++++++++++ .../AlpakaCore/interface/alpaka/EventSetup.h | 68 ++++++++ .../interface/alpaka/ModuleFactory.h | 12 ++ .../interface/alpaka/ProducerBase.h | 135 ++++++++++++++++ .../AlpakaCore/interface/alpaka/Record.h | 98 ++++++++++++ .../interface/alpaka/chooseDevice.h | 11 ++ .../interface/alpaka/global/EDProducer.h | 33 ++++ .../interface/alpaka/stream/EDProducer.h | 33 ++++ .../alpaka/stream/SynchronizingEDProducer.h | 51 ++++++ .../AlpakaCore/interface/alpaka/typelookup.h | 29 ++++ .../AlpakaCore/src/alpaka/EDMetadata.cc | 68 ++++++++ .../src/alpaka/EDMetadataAcquireSentry.cc | 31 ++++ .../AlpakaCore/src/alpaka/EDMetadataSentry.cc | 20 +++ .../AlpakaCore/src/alpaka/ESProducer.cc | 10 ++ .../AlpakaCore/src/alpaka/chooseDevice.cc | 37 +++++ .../interface/TransferToHost.h | 21 +++ HeterogeneousCore/AlpakaTest/BuildFile.xml | 7 + .../AlpakaTest/interface/AlpakaESTestData.h | 35 ++++ .../interface/AlpakaESTestRecords.h | 18 +++ .../AlpakaTest/interface/AlpakaESTestSoA.h | 17 ++ .../AlpakaTest/interface/ESTestData.h | 33 ++++ .../interface/alpaka/AlpakaESTestData.h | 38 +++++ .../AlpakaTest/plugins/BuildFile.xml | 3 + .../AlpakaTest/plugins/TestESProducers.cc | 36 +++++ .../plugins/alpaka/TestAlpakaESProducerA.cc | 88 +++++++++++ .../plugins/alpaka/TestAlpakaESProducerB.cc | 63 ++++++++ .../plugins/alpaka/TestAlpakaESProducerC.cc | 68 ++++++++ .../plugins/alpaka/TestAlpakaESProducerD.cc | 48 ++++++ .../alpaka/TestAlpakaGlobalProducer.cc | 55 +++++++ .../alpaka/TestAlpakaStreamProducer.cc | 63 ++++++++ .../TestAlpakaStreamSynchronizingProducer.cc | 52 ++++++ .../plugins/alpaka/TestHelperClass.cc | 18 +++ .../plugins/alpaka/TestHelperClass.h | 36 +++++ .../plugins/alpaka/testESAlgoAsync.dev.cc | 35 ++++ .../plugins/alpaka/testESAlgoAsync.h | 16 ++ .../AlpakaTest/src/AlpakaESTestRecords.cc | 7 + .../AlpakaTest/src/ES_AlpakaESTestData.cc | 9 ++ .../AlpakaTest/src/ES_ESTestData.cc | 6 + .../src/alpaka/ES_AlpakaESTestData.cc | 13 ++ .../AlpakaTest/test/BuildFile.xml | 6 + .../AlpakaTest/test/testAlpakaModules.sh | 31 ++++ .../AlpakaTest/test/testAlpakaModules_cfg.py | 113 +++++++++++++ 56 files changed, 2306 insertions(+) create mode 100644 DataFormats/Common/interface/DeviceProduct.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataAcquireSentry.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataSentry.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProduct.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/ProducerBase.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/Record.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/chooseDevice.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h create mode 100644 HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h create mode 100644 HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadata.cc create mode 100644 HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadataAcquireSentry.cc create mode 100644 HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadataSentry.cc create mode 100644 HeterogeneousCore/AlpakaCore/src/alpaka/ESProducer.cc create mode 100644 HeterogeneousCore/AlpakaCore/src/alpaka/chooseDevice.cc create mode 100644 HeterogeneousCore/AlpakaInterface/interface/TransferToHost.h create mode 100644 HeterogeneousCore/AlpakaTest/BuildFile.xml create mode 100644 HeterogeneousCore/AlpakaTest/interface/AlpakaESTestData.h create mode 100644 HeterogeneousCore/AlpakaTest/interface/AlpakaESTestRecords.h create mode 100644 HeterogeneousCore/AlpakaTest/interface/AlpakaESTestSoA.h create mode 100644 HeterogeneousCore/AlpakaTest/interface/ESTestData.h create mode 100644 HeterogeneousCore/AlpakaTest/interface/alpaka/AlpakaESTestData.h create mode 100644 HeterogeneousCore/AlpakaTest/plugins/TestESProducers.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerA.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerB.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerC.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerD.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/testESAlgoAsync.dev.cc create mode 100644 HeterogeneousCore/AlpakaTest/plugins/alpaka/testESAlgoAsync.h create mode 100644 HeterogeneousCore/AlpakaTest/src/AlpakaESTestRecords.cc create mode 100644 HeterogeneousCore/AlpakaTest/src/ES_AlpakaESTestData.cc create mode 100644 HeterogeneousCore/AlpakaTest/src/ES_ESTestData.cc create mode 100644 HeterogeneousCore/AlpakaTest/src/alpaka/ES_AlpakaESTestData.cc create mode 100755 HeterogeneousCore/AlpakaTest/test/testAlpakaModules.sh create mode 100644 HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py diff --git a/DataFormats/Common/interface/DeviceProduct.h b/DataFormats/Common/interface/DeviceProduct.h new file mode 100644 index 0000000000000..926d7ad16e262 --- /dev/null +++ b/DataFormats/Common/interface/DeviceProduct.h @@ -0,0 +1,76 @@ +#ifndef DataFormats_Common_interface_DeviceProduct_h +#define DataFormats_Common_interface_DeviceProduct_h + +#include +#include + +namespace edm { + class DeviceProductBase { + public: + DeviceProductBase() = default; + ~DeviceProductBase() = default; + + // TODO: in principle this function is an implementation detail + template + M const& metadata() const { + // TODO: I believe the assertion could be removed safely after + // the data dependence and scheduling systems would guarantee + // that the an EDModule in a given execution space can access + // only to the EDProducts in a memory space compatible with the + // execution space. + // + // On the other hand, with Alpaka (likely with others) the + // getSynchronized() does additional checks so the added cost is + // probably not that much? + assert(typeid(M) == *metadataType_); + return *static_cast(metadata_.get()); + } + + protected: + template + explicit DeviceProductBase(std::shared_ptr metadata) + : metadata_(std::move(metadata)), metadataType_(&typeid(M)) {} + + private: + std::shared_ptr metadata_; + std::type_info const* metadataType_; + }; + + /** + * A wrapper for Event Data product in device memory accompanied + * with some device-specific metadata. Not intended to be used directly by + * developers (except in ROOT dictionary declarations in + * classes_def.xml similar to edm::Wrapper). + */ + template + class DeviceProduct : public DeviceProductBase { + public: + DeviceProduct() = default; + + template + explicit DeviceProduct(std::shared_ptr metadata, Args&&... args) + : DeviceProductBase(std::move(metadata)), data_(std::forward(args)...) {} + + DeviceProduct(const DeviceProduct&) = delete; + DeviceProduct& operator=(const DeviceProduct&) = delete; + DeviceProduct(DeviceProduct&&) = default; + DeviceProduct& operator=(DeviceProduct&&) = default; + + /** + * Get the actual data product after the metadata object has + * synchronized the access. The synchronization details depend on + * the metadata type, which the caller must know. All the + * arguments are passed to M::synchronize() function. + */ + template + T const& getSynchronized(Args&&... args) const { + auto const& md = metadata(); + md.synchronize(std::forward(args)...); + return data_; + } + + private: + T data_; //! + }; +} // namespace edm +#endif diff --git a/DataFormats/Portable/interface/alpaka/PortableCollection.h b/DataFormats/Portable/interface/alpaka/PortableCollection.h index d9cfaf5c66bed..bed28cde953d2 100644 --- a/DataFormats/Portable/interface/alpaka/PortableCollection.h +++ b/DataFormats/Portable/interface/alpaka/PortableCollection.h @@ -9,6 +9,7 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" #include "DataFormats/Portable/interface/PortableDeviceCollection.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/TransferToHost.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { @@ -39,4 +40,20 @@ namespace traits { } // namespace traits +namespace cms::alpakatools { + // TODO: Is this the right place for the specialization? Or should it be in PortableDeviceProduct? + template + struct TransferToHost> { + using HostDataType = ::PortableHostCollection; + + template + static HostDataType transferAsync(TQueue& queue, + ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection const& deviceData) { + HostDataType hostData(deviceData->metadata().size(), queue); + alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); + return hostData; + } + }; +} // namespace cms::alpakatools + #endif // DataFormats_Portable_interface_alpaka_PortableDeviceCollection_h diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h index 3f2f0314bdf45..32d58300c3f78 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h @@ -1,3 +1,4 @@ +#include "DataFormats/Common/interface/DeviceProduct.h" #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Portable/interface/Product.h" #include "DataFormats/PortableTestObjects/interface/TestSoA.h" diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml index 06cbdbde44100..4ed9893e3b20b 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml @@ -4,4 +4,7 @@ + + + diff --git a/HeterogeneousCore/AlpakaCore/BuildFile.xml b/HeterogeneousCore/AlpakaCore/BuildFile.xml index 786446de87dce..fc6f633654d6d 100644 --- a/HeterogeneousCore/AlpakaCore/BuildFile.xml +++ b/HeterogeneousCore/AlpakaCore/BuildFile.xml @@ -1,5 +1,6 @@ + diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h new file mode 100644 index 0000000000000..49092c3043529 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h @@ -0,0 +1,24 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_DeviceProductType_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_DeviceProductType_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::detail { + /** + * This "trait" class abstracts the actual product type put in the + * edm::Event. + */ + template + struct DeviceProductType { +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // host synchronous backends can use TProduct directly + using type = TProduct; +#else + // all device and asynchronous backends need to be wrapped + using type = edm::DeviceProduct; +#endif + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::detail + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h new file mode 100644 index 0000000000000..8699cc2b8fdf7 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h @@ -0,0 +1,43 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDGetToken_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDGetToken_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::device { + class Event; + /** + * The device::EDGetToken is similar to edm::EDGetTokenT, but is + * intended for Event data products in the device memory space + * defined by the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). It + * can be used only to get data from a device::Event. + * + * A specific token class is motivated with + * - enforce stronger the type-deducing consumes(). Consumes() with + * explicit type will fail anyway in general, but succeeds on one + * of the backends. With a specific token type the explicit-type + * consumes() would fail always. + *- to avoid using device::EDGetToken with edm::Event + */ + template + class EDGetToken { + using ProductType = typename detail::DeviceProductType::type; + + public: + constexpr EDGetToken() = default; + + template + constexpr EDGetToken(TAdapter&& iAdapter) : token_(std::forward(iAdapter)) {} + + private: + friend class Event; + + auto const& underlyingToken() const { return token_; } + + edm::EDGetTokenT token_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::device + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h new file mode 100644 index 0000000000000..ddfb9f706621b --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h @@ -0,0 +1,99 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadata_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadata_h + +#include +#include + +#include + +#include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HostOnlyTask.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + /** + * The EDMetadata class provides the exact synchronization + * mechanisms for Event data products for backends with asynchronous + * Queue. These include + * - adding a notification for edm::WaitingTaskWithArenaHolder + * - recording an Event + * - synchronizing an Event data product and a consuming EDModule + * + * For synchronous backends the EDMetadata acts as an owner of the + * Queue object, as no further synchronization is needed. + * + * EDMetadata is used as the Metadata class for + * edm::DeviceProduct, and is an implementation detail (not + * visible to user code). + * + * TODO: What to do with device-synchronous backends? The data + * product needs to be wrapped into the edm::DeviceProduct, but the + * EDMetadata class used there does not need anything except "dummy" + * implementation of synchronize(). The question is clearly + * solvable, so maybe leave it to the time we would actually need + * one? + */ + +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // Host backends with a synchronous queue + + class EDMetadata { + public: + EDMetadata(std::shared_ptr queue) : queue_(std::move(queue)) {} + + Device device() const { return alpaka::getDev(*queue_); } + + // Alpaka operations do not accept a temporary as an argument + // TODO: Returning non-const reference here is BAD + Queue& queue() const { return *queue_; } + + void recordEvent() {} + + private: + std::shared_ptr queue_; + }; + + // TODO: else if device backends with a synchronous queue + +#else + // All backends with an asynchronous queue + + class EDMetadata { + public: + EDMetadata(std::shared_ptr queue, std::shared_ptr event) + : queue_(std::move(queue)), event_(std::move(event)) {} + ~EDMetadata(); + + Device device() const { return alpaka::getDev(*queue_); } + + // Alpaka operations do not accept a temporary as an argument + // TODO: Returning non-const reference here is BAD + Queue& queue() const { return *queue_; } + + void enqueueCallback(edm::WaitingTaskWithArenaHolder holder); + + void recordEvent() { alpaka::enqueue(*queue_, *event_); } + + /** + * Synchronizes 'consumer' metadata wrt. 'this' in the event product + */ + void synchronize(EDMetadata& consumer, bool tryReuseQueue) const; + + private: + /** + * Returns a shared_ptr to the Queue if it can be reused, or a + * null shared_ptr if not + */ + std::shared_ptr tryReuseQueue_() const; + + std::shared_ptr queue_; + std::shared_ptr event_; + // This flag tells whether the Queue may be reused by a + // consumer or not. The goal is to have a "chain" of modules to + // queue their work to the same queue. + mutable std::atomic mayReuseQueue_ = true; + }; +#endif +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataAcquireSentry.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataAcquireSentry.h new file mode 100644 index 0000000000000..6495d2127ff5e --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataAcquireSentry.h @@ -0,0 +1,47 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataAcquireSentry_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataAcquireSentry_h + +#include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace detail { + /** + * Helper class to be used in acquire() + * + * TODO: not really a sentry as it doesn't do anything special in its destructor. Better name? + */ + class EDMetadataAcquireSentry { + public: + // TODO: WaitingTaskWithArenaHolder not really needed for host synchronous case + // Constructor overload to be called from acquire() + EDMetadataAcquireSentry(edm::StreamID stream, edm::WaitingTaskWithArenaHolder holder); + + // Constructor overload to be called from registerTransformAsync() + EDMetadataAcquireSentry(Device const& device, edm::WaitingTaskWithArenaHolder holder); + + EDMetadataAcquireSentry(EDMetadataAcquireSentry const&) = delete; + EDMetadataAcquireSentry& operator=(EDMetadataAcquireSentry const&) = delete; + EDMetadataAcquireSentry(EDMetadataAcquireSentry&&) = delete; + EDMetadataAcquireSentry& operator=(EDMetadataAcquireSentry&&) = delete; + + std::shared_ptr metadata() { return metadata_; } + +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // all synchronous backends + std::shared_ptr finish() { return std::move(metadata_); } +#else + // all asynchronous backends + std::shared_ptr finish(); +#endif + + private: + std::shared_ptr metadata_; + + edm::WaitingTaskWithArenaHolder waitingTaskHolder_; + }; + } // namespace detail +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataSentry.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataSentry.h new file mode 100644 index 0000000000000..4698f029a5b7a --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataSentry.h @@ -0,0 +1,37 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataSentry_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataSentry_h + +#include "FWCore/Utilities/interface/StreamID.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace detail { + /** + * Helper class to be used in produce() + * + * TODO: not really a sentry as it doesn't do anything special in its destructor. Better name? + */ + class EDMetadataSentry { + public: + // For normal module + EDMetadataSentry(edm::StreamID stream); + + // For ExternalWork-module's produce() + EDMetadataSentry(std::shared_ptr metadata) : metadata_(std::move(metadata)) {} + + EDMetadataSentry(EDMetadataSentry const&) = delete; + EDMetadataSentry& operator=(EDMetadataSentry const&) = delete; + EDMetadataSentry(EDMetadataSentry&&) = delete; + EDMetadataSentry& operator=(EDMetadataSentry&&) = delete; + + std::shared_ptr metadata() { return metadata_; } + + void finish(); + + private: + std::shared_ptr metadata_; + }; + } // namespace detail +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h new file mode 100644 index 0000000000000..7c390e6e08d72 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h @@ -0,0 +1,45 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDPutToken_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDPutToken_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "FWCore/Utilities/interface/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::device { + class Event; + + /** + * The device::EDPutToken is similar to edm::EDPutTokenT, but is + * intended for Event data products in the device memory space + * defined by the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). It + * can be used only to put data into a device::Event + */ + template + class EDPutToken { + using ProductType = typename detail::DeviceProductType::type; + + public: + constexpr EDPutToken() noexcept = default; + + template + explicit EDPutToken(TAdapter&& adapter) : token_(adapter.template deviceProduces()) {} + + template + EDPutToken& operator=(TAdapter&& adapter) { + edm::EDPutTokenT tmp(adapter.template deviceProduces()); + token_ = tmp; + return *this; + } + + private: + friend class Event; + + auto const& underlyingToken() const { return token_; } + + edm::EDPutTokenT token_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::device + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProduct.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProduct.h new file mode 100644 index 0000000000000..dc6d7e4cd36d2 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProduct.h @@ -0,0 +1,63 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_ESDeviceProduct_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_ESDeviceProduct_h + +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + /** + * The sole purpose of this wrapper class is to segregate the + * EventSetup products in the device memory from the host memory + * + * In contrast to ED side, no synchronization are needed here as we + * mark the ES product done only after all the (in the future + * asynchronous) work has finished. + */ + template + class ESDeviceProduct { + public: + virtual ~ESDeviceProduct() {} + + T const& get(Device const& dev) const { return *cache_[alpaka::getNativeHandle(dev)]; } + + protected: + explicit ESDeviceProduct(size_t ndevices) : cache_(ndevices, nullptr) {} + + void setCache(size_t idev, T const* data) { cache_[idev] = data; } + + private: + // trading memory to avoid virtual function + std::vector cache_; + }; + + namespace detail { + /** + * This class holds the actual storage (since EventSetup proxies + * are able to hold std::optional, std::unique_ptr, and + * std::shared_ptr()). The object of this class holds the + * storage, while the consumers of the ESProducts see only the + * base class. + */ + template + class ESDeviceProductWithStorage : public ESDeviceProduct { + using Base = ESDeviceProduct; + + public: + explicit ESDeviceProductWithStorage(size_t ndevices) : Base(ndevices), data_(ndevices) {} + + void insert(Device const& dev, TStorage data) { + auto const idev = alpaka::getNativeHandle(dev); + data_[idev] = std::move(data); + this->setCache(idev, &*data_[idev]); + } + + private: + std::vector data_; + }; + } // namespace detail +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h new file mode 100644 index 0000000000000..05ef451e70c8a --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h @@ -0,0 +1,39 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_ESGetToken_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_ESGetToken_h + +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProduct.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::device { + class EventSetup; + template + class Record; + + /** + * The device::ESGetToken is similar to edm::ESGetToken, but is + * intended for EventSetup data products in the device memory space + * defined by the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). It + * can be used only to get data from a device::EventSetup and + * device::Record. + */ + template + class ESGetToken { + public: + constexpr ESGetToken() noexcept = default; + + template + constexpr ESGetToken(TAdapter&& iAdapter) : token_(std::forward(iAdapter)) {} + + private: + friend class EventSetup; + template + friend class Record; + + auto const& underlyingToken() const { return token_; } + + edm::ESGetToken, ESRecord> token_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::device + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h new file mode 100644 index 0000000000000..3b08a96b58322 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h @@ -0,0 +1,91 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_ESProducer_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_ESProducer_h + +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/produce_helpers.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESDeviceProduct.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Record.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + /** + * The ESProducer is a base class for modules producing data into + * the host memory space and/or the device memory space defined by + * the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). The interface + * looks similar to the normal edm::ESProducer. + * + * When producing a host product, the produce function should have + * the the usual Record argument. For producing a device product, + * the produce funtion should have device::Record argument. + */ + class ESProducer : public edm::ESProducer { + using Base = edm::ESProducer; + + protected: + template + auto setWhatProduced(T* iThis, edm::es::Label const& label = {}) { + return setWhatProduced(iThis, &T::produce, label); + } + + template + auto setWhatProduced(T* iThis, TReturn (T ::*iMethod)(TRecord const&), edm::es::Label const& label = {}) { + return Base::setWhatProduced(iThis, iMethod, label); + } + + template + auto setWhatProduced(T* iThis, + TReturn (T ::*iMethod)(device::Record const&), + edm::es::Label const& label = {}) { + using TProduct = typename edm::eventsetup::produce::smart_pointer_traits::type; + using ProductType = ESDeviceProduct; + using ReturnType = detail::ESDeviceProductWithStorage; + return Base::setWhatProduced( + [iThis, iMethod](TRecord const& record) -> std::unique_ptr { + // TODO: move the multiple device support into EventSetup system itself + auto const& devices = cms::alpakatools::devices(); + std::vector> queues; + queues.reserve(devices.size()); + auto ret = std::make_unique(devices.size()); + bool allnull = true; + bool anynull = false; + for (auto const& dev : devices) { + device::Record const deviceRecord(record, dev); + auto prod = std::invoke(iMethod, iThis, deviceRecord); + if (prod) { + allnull = false; + ret->insert(dev, std::move(prod)); + } else { + anynull = true; + } + queues.push_back(deviceRecord.queuePtr()); + } + // TODO: to be changed asynchronous later + for (auto& queuePtr : queues) { + alpaka::wait(*queuePtr); + } + if (allnull) { + return nullptr; + } else if (anynull) { + // TODO: throwing an exception if the iMethod() returns + // null for some of th devices of one backend is + // suboptimal. On the other hand, in the near term + // multiple devices per backend is useful only for + // private tests (not production), and the plan is to + // make the EventSetup system itself aware of multiple + // devies (or memory spaces). I hope this exception + // would be good-enough until we get there. + ESProducer::throwSomeNullException(); + } + return ret; + }, + label); + } + + private: + static void throwSomeNullException(); + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h new file mode 100644 index 0000000000000..289ee18927a14 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h @@ -0,0 +1,149 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_Event_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_Event_h + +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::device { + /** + * The device::Event mimics edm::Event, and provides access to + * EDProducts in the host memory space, and in the device memory + * space defined by the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). + * The device::Event also gives access to the Queue object the + * EDModule code should use to queue all the device operations. + * + * Access to device memory space products is synchronized properly. + * For backends with synchronous Queue this is trivial. For + * asynchronous Queue, either the Queue of the EDModule is taken + * from the first data product, or a wait is inserted into the + * EDModule's Queue to wait for the product's asynchronous + * production to finish. + * + * Note that not full interface of edm::Event is replicated here. If + * something important is missing, that can be added. + */ + class Event { + public: + // To be called in produce() + explicit Event(edm::Event& ev, std::shared_ptr metadata) + : constEvent_(ev), event_(&ev), metadata_(std::move(metadata)) {} + + // To be called in acquire() + explicit Event(edm::Event const& ev, std::shared_ptr metadata) + : constEvent_(ev), metadata_(std::move(metadata)) {} + + Event(Event const&) = delete; + Event& operator=(Event const&) = delete; + Event(Event&&) = delete; + Event& operator=(Event&&) = delete; + + auto streamID() const { return constEvent_.streamID(); } + auto id() const { return constEvent_.id(); } + + Device device() const { return metadata_->device(); } + + // Alpaka operations do not accept a temporary as an argument + // TODO: Returning non-const reference here is BAD + Queue& queue() const { + queueUsed_ = true; + return metadata_->queue(); + } + + // get() + + template + T const& get(edm::EDGetTokenT const& token) const { + return constEvent_.get(token); + } + + template + T const& get(device::EDGetToken const& token) const { + auto const& deviceProduct = constEvent_.get(token.underlyingToken()); + if constexpr (std::is_same_v::type, T>) { + return deviceProduct; + } else { + // try to re-use queue from deviceProduct if our queue has not yet been used + T const& product = deviceProduct.template getSynchronized(*metadata_, not queueUsed_); + queueUsed_ = true; + return product; + } + } + + // getHandle() + + template + edm::Handle getHandle(edm::EDGetTokenT const& token) const { + return constEvent_.getHandle(token); + } + + template + edm::Handle getHandle(device::EDGetToken const& token) const { + auto deviceProductHandle = constEvent_.getHandle(token.underlyingToken()); + if constexpr (std::is_same_v::type, T>) { + return deviceProductHandle; + } else { + if (not deviceProductHandle) { + return edm::Handle(deviceProductHandle.whyFailedFactory()); + } + // try to re-use queue from deviceProduct if our queue has not yet been used + T const& product = deviceProductHandle->getSynchronized(*metadata_, not queueUsed_); + queueUsed_ = true; + return edm::Handle(&product, deviceProductHandle.provenance()); + } + } + + // emplace() + + template + edm::OrphanHandle emplace(edm::EDPutTokenT const& token, Args&&... args) { + return event_->emplace(token, std::forward(args)...); + } + + // TODO: what to do about the returned OrphanHandle object? + // The idea for Ref-like things in this domain differs from earlier Refs anyway + template + void emplace(device::EDPutToken const& token, Args&&... args) { + if constexpr (std::is_same_v::type, T>) { + event_->emplace(token.underlyingToken(), std::forward(args)...); + } else { + event_->emplace(token.underlyingToken(), metadata_, std::forward(args)...); + } + } + + // put() + + template + edm::OrphanHandle put(edm::EDPutTokenT const& token, std::unique_ptr product) { + return event_->put(token, std::move(product)); + } + + template + void put(device::EDPutToken const& token, std::unique_ptr product) { + if constexpr (std::is_same_v::type, T>) { + event_->emplace(token.underlyingToken(), std::move(*product)); + } else { + event_->emplace(token.underlyingToken(), metadata_, std::move(*product)); + } + } + + private: + // Having both const and non-const here in order to serve the + // clients with one device::Event class + edm::Event const& constEvent_; + edm::Event* event_ = nullptr; + + std::shared_ptr metadata_; + // device::Event is not supposed to be const-thread-safe, so no + // additional protection is needed. + mutable bool queueUsed_ = false; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::device + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h new file mode 100644 index 0000000000000..5ec294fb11cae --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h @@ -0,0 +1,68 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EventSetup_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_EventSetup_h + +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::device { + /** + * The device::EventSetup mimics edm::EventSetup, and provides access + * to ESProducts in the host memory space, and in the device memory + * space defined by the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). + * + * Access to device memory space products is synchronized properly. + * + * Note that not full interface of edm::EventSetup is replicated + * here. If something important is missing, that can be added. + */ + class EventSetup { + public: + EventSetup(edm::EventSetup const& iSetup, Device const& dev) : setup_(iSetup), device_(dev) {} + + // getData() + + template + T const& getData(edm::ESGetToken const& iToken) const { + return setup_.getData(iToken); + } + + template + T const& getData(device::ESGetToken const& iToken) const { + auto const& product = setup_.getData(iToken.underlyingToken()); + return product.get(device_); + } + + // getHandle() + + template + edm::ESHandle getHandle(edm::ESGetToken const& iToken) const { + return setup_.getHandle(iToken); + } + + template + edm::ESHandle getHandle(device::ESGetToken const& iToken) const { + auto handle = setup_.getHandle(iToken.underlyingToken()); + if (not handle) { + return edm::ESHandle(handle.whyFailedFactory()); + } + return edm::ESHandle(&handle->get(device_), handle.description()); + } + + // getTransientHandle() is intentionally omitted for now. It makes + // little sense for event transitions, and for now + // device::EventSetup is available only for those. If + // device::EventSetup ever gets added for run or lumi transitions, + // getTransientHandle() will be straightforward to add + + private: + edm::EventSetup const& setup_; + // Taking a copy because alpaka::getDev() returns a temporary. To + // be removed after a proper treatment of multiple devices per + // backend is implemented in Eventsetup + Device const device_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::device + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h new file mode 100644 index 0000000000000..8cd6277f7d61d --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h @@ -0,0 +1,12 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_ModuleFactory_h +#define HeterogeneousCore_AlpakaCore_interface_alpaka_ModuleFactory_h + +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +// force expanding ALPAKA_ACCELERATOR_NAMESPACE before stringification inside DEFINE_FWK_EVENTSETUP_MODULE +#define DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE2(type) DEFINE_FWK_EVENTSETUP_MODULE(type) +#define DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(type) \ + DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::type) + +#endif diff --git a/HeterogeneousCore/AlpakaCore/interface/alpaka/ProducerBase.h b/HeterogeneousCore/AlpakaCore/interface/alpaka/ProducerBase.h new file mode 100644 index 0000000000000..8d3272c69ea5c --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/alpaka/ProducerBase.h @@ -0,0 +1,135 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_ProducerBase_h +#define HeterogeneousCore_AlpakaCore_interface_ProducerBase_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "FWCore/Framework/interface/moduleAbilities.h" +#include "FWCore/Utilities/interface/EDPutToken.h" +#include "FWCore/Utilities/interface/Transition.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataAcquireSentry.h" +#include "HeterogeneousCore/AlpakaCore/interface/EventCache.h" +#include "HeterogeneousCore/AlpakaCore/interface/QueueCache.h" +#include "HeterogeneousCore/AlpakaInterface/interface/TransferToHost.h" + +#include +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class ProducerBaseAdaptor; + + /** + * The ProducerBase acts as a common base class for all Alpaka + * EDProducers. The main benefit is to have a single place for the + * definition of produces() functions. + * + * The produces() functions return a custom ProducerBaseAdaptor in + * order to call the deviceProduces(). For device or asynchronous + * backends the deviceProduces() registers the automatic transfer to + * host and a transformation from edm::DeviceProduct to U, where + * U is the host-equivalent of T. The transformation from T to U is + * done by a specialization of cms::alpakatools::TransferToHost + * template, that should be provided in the same file where T is + * defined. + * + * TODO: add "override" for labelsForToken() + */ + template