From bc4392bf599aac0f2fd56cd206db56f2c0a15ccf Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 12 Oct 2021 11:01:04 +0200 Subject: [PATCH 01/50] [cudadev] Downgraded the requirement for device/pinned host memory to trivially copyable. The requirement for trivially constructible prevented creating structures on the host side and then memory copying it to the device side. --- src/cudadev/CUDACore/device_unique_ptr.h | 8 ++++---- src/cudadev/CUDACore/host_unique_ptr.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cudadev/CUDACore/device_unique_ptr.h b/src/cudadev/CUDACore/device_unique_ptr.h index ab5d6bc25..6e67d6cc7 100644 --- a/src/cudadev/CUDACore/device_unique_ptr.h +++ b/src/cudadev/CUDACore/device_unique_ptr.h @@ -49,8 +49,8 @@ namespace cms { template typename device::impl::make_device_unique_selector::non_array make_device_unique(cudaStream_t stream) { - static_assert(std::is_trivially_constructible::value, - "Allocating with non-trivial constructor on the device memory is not supported"); + static_assert(std::is_trivially_copyable::value, + "Allocating with non-trivial copy on the device memory is not supported"); void *mem = allocate_device(sizeof(T), stream); return typename device::impl::make_device_unique_selector::non_array{reinterpret_cast(mem), device::impl::DeviceDeleter{stream}}; @@ -60,8 +60,8 @@ namespace cms { typename device::impl::make_device_unique_selector::unbounded_array make_device_unique(size_t n, cudaStream_t stream) { using element_type = typename std::remove_extent::type; - static_assert(std::is_trivially_constructible::value, - "Allocating with non-trivial constructor on the device memory is not supported"); + static_assert(std::is_trivially_copyable::value, + "Allocating with non-trivial copy on the device memory is not supported"); void *mem = allocate_device(n * sizeof(element_type), stream); return typename device::impl::make_device_unique_selector::unbounded_array{ reinterpret_cast(mem), device::impl::DeviceDeleter{stream}}; diff --git a/src/cudadev/CUDACore/host_unique_ptr.h b/src/cudadev/CUDACore/host_unique_ptr.h index f34798da3..ba5369bbe 100644 --- a/src/cudadev/CUDACore/host_unique_ptr.h +++ b/src/cudadev/CUDACore/host_unique_ptr.h @@ -39,8 +39,8 @@ namespace cms { // Allocate pinned host memory template typename host::impl::make_host_unique_selector::non_array make_host_unique(cudaStream_t stream) { - static_assert(std::is_trivially_constructible::value, - "Allocating with non-trivial constructor on the pinned host memory is not supported"); + static_assert(std::is_trivially_copyable::value, + "Allocating with non-trivial copy on the pinned host memory is not supported"); void *mem = allocate_host(sizeof(T), stream); return typename host::impl::make_host_unique_selector::non_array{reinterpret_cast(mem)}; } @@ -48,8 +48,8 @@ namespace cms { template typename host::impl::make_host_unique_selector::unbounded_array make_host_unique(size_t n, cudaStream_t stream) { using element_type = typename std::remove_extent::type; - static_assert(std::is_trivially_constructible::value, - "Allocating with non-trivial constructor on the pinned host memory is not supported"); + static_assert(std::is_trivially_copyable::value, + "Allocating with non-trivial copy on the pinned host memory is not supported"); void *mem = allocate_host(n * sizeof(element_type), stream); return typename host::impl::make_host_unique_selector::unbounded_array{reinterpret_cast(mem)}; } From de1f3ed9ec1c537958d2d7765613e4167e88e632 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 12 Oct 2021 11:26:12 +0200 Subject: [PATCH 02/50] [cudadev] Added support for buffer-store-view SoA model. The SoA store divides a user pre-allocated buffer in adjacent columns of values. The columns are bytes aligned to a setable alignment and their length is determined at run time. The SoA views allow grouping columns from multiple stores in a logical entity. For example when some columns are used on device-only and otheres transmitted to host. The two groups are on two different stores, but joined together in a logic view. Views can also provide access to a subset of a store. --- src/cudadev/DataFormats/SoACommon.h | 181 ++++++++ src/cudadev/DataFormats/SoAStore.h | 642 ++++++++++++++++++++++++++++ src/cudadev/DataFormats/SoAView.h | 351 +++++++++++++++ 3 files changed, 1174 insertions(+) create mode 100644 src/cudadev/DataFormats/SoACommon.h create mode 100644 src/cudadev/DataFormats/SoAStore.h create mode 100644 src/cudadev/DataFormats/SoAView.h diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h new file mode 100644 index 000000000..912a92413 --- /dev/null +++ b/src/cudadev/DataFormats/SoACommon.h @@ -0,0 +1,181 @@ +/* + * Definitions of SoA common parameters for SoA class generators + */ + +#ifndef DataStrcutures_SoACommon_h +#define DataStrcutures_SoACommon_h + +#include "boost/preprocessor.hpp" +#include + +// CUDA attributes +#ifdef __CUDACC__ +#define SOA_HOST_ONLY __host__ +#define SOA_DEVICE_ONLY __device__ +#define SOA_HOST_DEVICE __host__ __device__ +#define SOA_HOST_DEVICE_INLINE __host__ __device__ __forceinline__ +#define SOA_DEVICE_RESTRICT __restrict__ +#else +#define SOA_HOST_ONLY +#define SOA_DEVICE_ONLY +#define SOA_HOST_DEVICE +#define SOA_HOST_DEVICE_INLINE inline +#define SOA_DEVICE_RESTRICT +#endif + +#if defined(__CUDACC__) && defined(__CUDA_ARCH__) +// Read a pointer content via read-only (non coherent) cache. +#define LOAD_INCOHERENT(A) __ldg(A) +#define LOAD_STREAMED(A) __ldcs(A) +#define STORE_STREAMED(A, V) __stcs(A, V) +#else +#define LOAD_INCOHERENT(A) *(A) +#define LOAD_STREAMED(A) *(A) +#define STORE_STREAMED(A, V) *(A) = (V) +#endif + +// compile-time sized SoA + +// Helper template managing the value within it column +template +class SoAValue { +public: + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): idx_(i), col_(col) {} + /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ + SOA_HOST_DEVICE_INLINE T& operator() () { return col_[idx_]; } + SOA_HOST_DEVICE_INLINE T operator() () const { return LOAD_INCOHERENT(col_ + idx_); } + SOA_HOST_DEVICE_INLINE T* operator& () { return &col_[idx_]; } + SOA_HOST_DEVICE_INLINE const T* operator& () const { return &col_[idx_]; } + template + SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return col_[idx_] = v; } + typedef T valueType; + static constexpr auto valueSize = sizeof(T); +private: + size_t idx_; + T *col_; +}; + +// Helper template managing the value within it column +template +class SoAConstValue { +public: + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T * col): idx_(i), col_(col) {} + /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ + SOA_HOST_DEVICE_INLINE T operator() () const { return LOAD_INCOHERENT(col_ + idx_); } + SOA_HOST_DEVICE_INLINE const T* operator& () const { return &col_[idx_]; } + typedef T valueType; + static constexpr auto valueSize = sizeof(T); +private: + size_t idx_; + const T *col_; +}; + + +// Helper template managing the value within it column +template +class SoAEigenValue { +public: + typedef C Type; + typedef Eigen::Map> MapType; + typedef Eigen::Map> CMapType; + SOA_HOST_DEVICE_INLINE SoAEigenValue(size_t i, typename C::Scalar * col, size_t stride): + val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, + Eigen::InnerStride(stride)), + crCol_(col), + cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, + Eigen::InnerStride(stride)), + stride_(stride) {} + SOA_HOST_DEVICE_INLINE MapType& operator() () { return val_; } + SOA_HOST_DEVICE_INLINE const CMapType& operator() () const { return cVal_; } + SOA_HOST_DEVICE_INLINE operator C() { return val_; } + SOA_HOST_DEVICE_INLINE operator const C() const { return cVal_; } + SOA_HOST_DEVICE_INLINE C* operator& () { return &val_; } + SOA_HOST_DEVICE_INLINE const C* operator& () const { return &cVal_; } + template + SOA_HOST_DEVICE_INLINE MapType& operator= (const C2& v) { return val_ = v; } + typedef typename C::Scalar ValueType; + static constexpr auto valueSize = sizeof(C::Scalar); + SOA_HOST_DEVICE_INLINE size_t stride() { return stride_; } + template + typename Eigen::MatrixBase::template cross_product_return_type::type + SOA_HOST_DEVICE_INLINE cross(const Eigen::MatrixBase& other) const { return cVal_.cross(other); } + + template + typename Eigen::MatrixBase::template cross_product_return_type::type + SOA_HOST_DEVICE_INLINE cross(const OtherType& other) const { return cVal_.cross(other.cVal_); } + +private: + MapType val_; + const typename C::Scalar * __restrict__ crCol_; + CMapType cVal_; + size_t stride_; +}; + +// Helper template to avoid commas in macro +template +struct EigenConstMapMaker { + typedef Eigen::Map> Type; + class DataHolder { + public: + DataHolder(const typename C::Scalar * data): data_(data) {} + EigenConstMapMaker::Type withStride(size_t stride) { + return EigenConstMapMaker::Type(data_, C::RowsAtCompileTime, C::ColsAtCompileTime, + Eigen::InnerStride(stride)); + } + private: + const typename C::Scalar * const data_; + }; + static DataHolder withData(const typename C::Scalar * data) { + return DataHolder(data); + } +}; + +// Helper function to compute aligned size +inline size_t alignSize(size_t size, size_t alignment = 128) { + if (size) + return ((size - 1) / alignment + 1) * alignment; + else + return 0; +} + +/* declare "scalars" (one value shared across the whole SoA) and "columns" (one value per element) */ +#define _VALUE_TYPE_SCALAR 0 +#define _VALUE_TYPE_COLUMN 1 +#define _VALUE_TYPE_EIGEN_COLUMN 2 +#define _VALUE_TYPE_FUNDAMENTAL_COLUMN 3 + +#define SoA_scalar(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) +#define SoA_column(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) +#define SoA_eigenColumn(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) +#define SoA_FundamentalTypeColumn(TYPE, NAME) (_VALUE_TYPE_FUNDAMENTAL_COLUMN, TYPE, NAME) + +/* Iterate on the macro MACRO and return the result as a comma separated list */ +#define _ITERATE_ON_ALL_COMMA(MACRO, DATA, ...) \ + BOOST_PP_TUPLE_ENUM( \ + BOOST_PP_SEQ_TO_TUPLE( \ + _ITERATE_ON_ALL(MACRO, DATA, __VA_ARGS__) \ + ) \ + ) +/* Iterate MACRO on all elements */ +#define _ITERATE_ON_ALL(MACRO, DATA, ...) \ + BOOST_PP_SEQ_FOR_EACH(MACRO, DATA, \ + BOOST_PP_VARIADIC_TO_SEQ(__VA_ARGS__) \ + ) + +/* Switch on macros depending on scalar / column type */ +#define _SWITCH_ON_TYPE(VALUE_TYPE, IF_SCALAR, IF_COLUMN, IF_EIGEN_COLUMN, IF_FUNDAMENTAL_COLUMN) \ + BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_SCALAR), \ + IF_SCALAR, \ + BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_COLUMN), \ + IF_COLUMN, \ + BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), \ + IF_EIGEN_COLUMN, \ + BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_FUNDAMENTAL_COLUMN), \ + IF_FUNDAMENTAL_COLUMN, \ + BOOST_PP_EMPTY() \ + ) \ + ) \ + ) \ + ) + +#endif // ndef DataStrcutures_SoACommon_h diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoAStore.h new file mode 100644 index 000000000..fdd4cc987 --- /dev/null +++ b/src/cudadev/DataFormats/SoAStore.h @@ -0,0 +1,642 @@ +/* + * Structure-of-Arrays template with "columns" and "scalars", defined through preprocessor macros, + * with compile-time size and alignment, and accessors to the "rows" and "columns". + */ + +#ifndef DataStrcutures_SoAStore_h +#define DataStrcutures_SoAStore_h + +#include "SoACommon.h" + +#include +#include + + +/* dump SoA fields information; these should expand to, for columns: + * Example: + * declare_SoA_template(SoA, + * // predefined static scalars + * // size_t size; + * // size_t alignment; + * + * // columns: one value per element + * SoA_FundamentalTypeColumn(double, x), + * SoA_FundamentalTypeColumn(double, y), + * SoA_FundamentalTypeColumn(double, z), + * SoA_eigenColumn(Eigen::Vector3d, a), + * SoA_eigenColumn(Eigen::Vector3d, b), + * SoA_eigenColumn(Eigen::Vector3d, r), + * SoA_column(uint16_t, colour), + * SoA_column(int32_t, value), + * SoA_column(double *, py), + * SoA_FundamentalTypeColumn(uint32_t, count), + * SoA_FundamentalTypeColumn(uint32_t, anotherCount), + * + * // scalars: one value for the whole structure + * SoA_scalar(const char *, description), + * SoA_scalar(uint32_t, someNumber) + * ); + * + * dumps as: + * SoA(32, 64): + * sizeof(SoA): 152 + * Column x_ at offset 0 has size 256 and padding 0 + * Column y_ at offset 256 has size 256 and padding 0 + * Column z_ at offset 512 has size 256 and padding 0 + * Eigen value a_ at offset 768 has dimension (3 x 1) and per column size 256 and padding 0 + * Eigen value b_ at offset 1536 has dimension (3 x 1) and per column size 256 and padding 0 + * Eigen value r_ at offset 2304 has dimension (3 x 1) and per column size 256 and padding 0 + * Column colour_ at offset 3072 has size 64 and padding 0 + * Column value_ at offset 3136 has size 128 and padding 0 + * Column py_ at offset 3264 has size 256 and padding 0 + * Column count_ at offset 3520 has size 128 and padding 0 + * Column anotherCount_ at offset 3648 has size 128 and padding 0 + * Scalar description_ at offset 3776 has size 8 and padding 56 + * Scalar someNumber_ at offset 3840 has size 4 and padding 60 + * Final offset = 3904 computeDataSize(...): 3904 + * + */ + +#define _DECLARE_SOA_DUMP_INFO_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Dump scalar */ \ + std::cout << " Scalar " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ + << " has size " << sizeof(CPP_TYPE) << " and padding " \ + << ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment - sizeof(CPP_TYPE) \ + << std::endl; \ + offset+=((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment; \ + , \ + /* Dump column */ \ + std::cout << " Column " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ + << " has size " << sizeof(CPP_TYPE) * nElements << " and padding " \ + << (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment - (sizeof(CPP_TYPE) * nElements) \ + << std::endl; \ + offset+=(((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , \ + /* Dump Eigen column */ \ + std::cout << " Eigen value " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ + << " has dimension (" << CPP_TYPE::RowsAtCompileTime << " x " << CPP_TYPE::ColsAtCompileTime << ")" \ + << " and per column size " << sizeof(CPP_TYPE::Scalar) * nElements << " and padding " \ + << (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ + - (sizeof(CPP_TYPE::Scalar) * nElements) \ + << std::endl; \ + offset+=(((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ + * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + , \ + /* Dump fundamental type column */ \ + std::cout << " Column " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ + << " has size " << sizeof(CPP_TYPE) * nElements << " and padding " \ + << (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment - (sizeof(CPP_TYPE) * nElements) \ + << std::endl; \ + offset+=(((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ +) + +#define _DECLARE_SOA_DUMP_INFO(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_SOA_DUMP_INFO_IMPL TYPE_NAME) + + +/** + * SoAMetadata member computing column pitch + */ +#define _COMPUTE_SOA_COLUMN_PITCH_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ + } \ + , \ + /* Column */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ + } \ + , \ + /* Eigen column */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_ \ + * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + } \ + , \ + /* Fundamental type column */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ + } \ + ) + +#define _COMPUTE_SOA_COLUMN_PITCH(R, DATA, TYPE_NAME) \ + _COMPUTE_SOA_COLUMN_PITCH_IMPL TYPE_NAME + +/** + * SoAMetadata member computing column pitch + */ +#define _DEFINE_SOA_COLUMN_TYPES_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + , \ + /* Column */ \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + , \ + /* Eigen column */ \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + , \ + /* Fundamental type column */ \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + ) + +#define _DEFINE_SOA_COLUMN_TYPES(R, DATA, TYPE_NAME) \ + _DEFINE_SOA_COLUMN_TYPES_IMPL TYPE_NAME + +/** + * Member assignment for trivial constructor + */ +#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ + , \ + /* Column */ \ + ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ + , \ + /* Eigen column */ \ + ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ + ( BOOST_PP_CAT(NAME, Stride_) (0) ) \ + , \ + /* Fundamental type column */ \ + ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ +) + +#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL TYPE_NAME) +/** + * Computation of the column or scalar pointer location in the memory layout (at SoA construction time) + */ +#define _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((sizeof(CPP_TYPE) - 1) / byteAlignment_) + 1) * byteAlignment_; \ + , \ + /* Column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment_) + 1) * byteAlignment_; \ + , \ + /* Eigen column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment_) + 1) * byteAlignment_ \ + * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + BOOST_PP_CAT(NAME, Stride_) = (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment_) + 1) \ + * byteAlignment_ / sizeof(CPP_TYPE::Scalar); \ + , \ + /* Fundamental type column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment_) + 1) * byteAlignment_; \ + ) + +#define _ASSIGN_SOA_COLUMN_OR_SCALAR(R, DATA, TYPE_NAME) \ + _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL TYPE_NAME + +/** + * Computation of the column or scalar size for SoA size computation + */ +#define _ACCUMULATE_SOA_ELEMENT_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + ret += (((sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , \ + /* Column */ \ + ret += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , \ + /* Eigen column */ \ + ret += (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ + * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + , \ + /* Fundamental type column */ \ + ret += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + ) + +#define _ACCUMULATE_SOA_ELEMENT(R, DATA, TYPE_NAME) \ + _ACCUMULATE_SOA_ELEMENT_IMPL TYPE_NAME + +/** + * Value accessor of the const_element subclass. + */ +#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + SOA_HOST_DEVICE_INLINE \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + CPP_TYPE const & NAME() { return soa_. NAME (); } \ + , \ + /* Column */ \ + CPP_TYPE const & NAME() { return * (soa_. NAME () + index_); } \ + , \ + /* Eigen column */ \ + /* Ugly hack with a helper template to avoid having commas inside the macro parameter */ \ + EigenConstMapMaker::Type const NAME() { \ + return EigenConstMapMaker::withData(soa_. NAME () + index_).withStride(soa_.BOOST_PP_CAT(NAME, Stride)()); \ + } \ + , \ + /* Fundamental type column */ \ + CPP_TYPE const & NAME() { return * (soa_. NAME () + index_ + 0 + 0); } \ + ) + +#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) \ + _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME + +/** + * Generator of parameters for (non-const) element subclass (expanded comma separated). + */ +#define _DECLARE_ELEMENT_VALUE_ARG_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + (CPP_TYPE *NAME) \ + , \ + /* Eigen column */ \ + (CPP_TYPE::Scalar *NAME) (size_t BOOST_PP_CAT(NAME, Stride)) \ + , \ + /* Fundamental type column */ \ + (CPP_TYPE &NAME) \ + ) + +#define _DECLARE_ELEMENT_VALUE_ARG(R, DATA, TYPE_NAME) \ + _DECLARE_ELEMENT_VALUE_ARG_IMPL TYPE_NAME + +/** + * Generator of member initialization for constructor of element subclass + */ +#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + (NAME (DATA, NAME)) \ + , \ + /* Eigen column */ \ + (NAME (DATA, NAME, BOOST_PP_CAT(NAME, Stride))) \ + , \ + /* Fundamental type column */ \ + (NAME (NAME)) \ + ) + +/* declare AoS-like element value args for contructor; these should expand,for columns only */ +#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) + +/** + * Generator of member initialization for constructor of const element subclass + */ +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + (BOOST_PP_CAT(NAME, _) (DATA, NAME)) \ + , \ + /* Eigen column */ \ + (BOOST_PP_CAT(NAME, _) (DATA, NAME, BOOST_PP_CAT(NAME, Stride))) \ + , \ + /* Fundamental type column */ \ + (BOOST_PP_CAT(NAME, _) (NAME)) \ + ) + +/* declare AoS-like element value args for contructor; these should expand,for columns only */ +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) +/** + * Generator of the member-by-member copy operator of the element subclass. + */ +#define _DECLARE_ELEMENT_VALUE_COPY_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + NAME() = other.NAME(); \ + , \ + /* Eigen column */ \ + static_cast(NAME) = static_cast::type &>(other.NAME); \ + , \ + /* Fundamental type column */ \ + NAME = static_cast::type &>(other.NAME); \ + ) + +#define _DECLARE_ELEMENT_VALUE_COPY(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_COPY_IMPL TYPE_NAME) + +/** + * Declaration of the private members of the const element subclass + */ +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + const SoAValue BOOST_PP_CAT(NAME, _); \ + , \ + /* Eigen column */ \ + const SoAEigenValue BOOST_PP_CAT(NAME, _); \ + , \ + /* Fundamental type column */ \ + const CPP_TYPE & BOOST_PP_CAT(NAME, _); \ + ) + +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) \ + _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME + +/** + * Declaration of the members accessors of the const element subclass + */ +#define _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ /* (LOAD_INCOHERENT already done inside NAME_() */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return BOOST_PP_CAT(NAME, _)(); } \ + , \ + /* Eigen column */ \ + SOA_HOST_DEVICE_INLINE const SoAEigenValue NAME() const { return BOOST_PP_CAT(NAME, _); } \ + , \ + /* Fundamental type column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return LOAD_INCOHERENT(& BOOST_PP_CAT(NAME, _) ); } \ + ) + +#define _DECLARE_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) \ + _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME + +/** + * Declaration of the members of the element subclass + */ +#define _DECLARE_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + SoAValue NAME; \ + , \ + /* Eigen column */ \ + SoAEigenValue NAME; \ + , \ + /* Fundamental type column */ \ + CPP_TYPE & NAME; \ + ) + + +#define _DECLARE_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) \ + _DECLARE_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME + +/** + * Parameters passed to element subclass constructor in operator[] + */ +#define _DECLARE_ELEMENT_CONSTR_CALL_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_EMPTY() \ + , \ + /* Column */ \ + (BOOST_PP_CAT(NAME, _)) \ + , \ + /* Eigen column */ \ + (BOOST_PP_CAT(NAME, _)) (BOOST_PP_CAT(NAME, Stride_)) \ + , \ + /* Fundamental type column */ \ + (BOOST_PP_CAT(NAME, _[index])) \ + ) + +#define _DECLARE_ELEMENT_CONSTR_CALL(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_ELEMENT_CONSTR_CALL_IMPL TYPE_NAME) + +/** + * Direct access to column pointer and indexed access + */ +#define _DECLARE_SOA_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME() { return * BOOST_PP_CAT(NAME, _); } \ + , \ + /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; } \ + , \ + /* Eigen column */ \ + /* Unsupported for the moment TODO */ \ + BOOST_PP_EMPTY() \ + , \ + /* Fundamental type column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; } \ + ) + +#define _DECLARE_SOA_ACCESSOR(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_SOA_ACCESSOR_IMPL TYPE_NAME) + +/** + * Direct access to column pointer (const) and indexed access. + */ +#define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return LOAD_INCOHERENT(BOOST_PP_CAT(NAME, _)); } \ + , \ + /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) const { return LOAD_INCOHERENT(BOOST_PP_CAT(NAME, _) + index); } \ + , \ + /* Eigen column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE::Scalar const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT(NAME,Stride)() { return BOOST_PP_CAT(NAME, Stride_); } \ + , \ + /* Fundamental type column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) const { return LOAD_INCOHERENT(BOOST_PP_CAT(NAME, _) + index); } \ + ) + +#define _DECLARE_SOA_CONST_ACCESSOR(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_SOA_CONST_ACCESSOR_IMPL TYPE_NAME) + +/** + * SoA class member declaration (column pointers). + */ +#define _DECLARE_SOA_DATA_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ + , \ + /* Column */ \ + CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ + , \ + /* Eigen column */ \ + CPP_TYPE::Scalar * BOOST_PP_CAT(NAME, _) = nullptr; \ + size_t BOOST_PP_CAT(NAME, Stride_) = 0; \ + , \ + /* Fundamental type column */ \ + CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ + ) + +#define _DECLARE_SOA_DATA_MEMBER(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_SOA_DATA_MEMBER_IMPL TYPE_NAME) + +#ifdef DEBUG +#define _DO_RANGECHECK true +#else +#define _DO_RANGECHECK false +#endif + +/* + * A macro defining a SoA store (collection of scalars and columns of equal lengths + */ +#define generate_SoA_store(CLASS, ...) \ +struct CLASS { \ + \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static size_t defaultAlignment = 128; \ + \ + /* dump the SoA internal structure */ \ + SOA_HOST_ONLY \ + static void dump(size_t nElements, size_t byteAlignment = defaultAlignment) { \ + std::cout << #CLASS "(" << nElements << ", " << byteAlignment << "): " << std::endl; \ + std::cout << " sizeof(" #CLASS "): " << sizeof(CLASS) << std::endl; \ + size_t offset=0; \ + _ITERATE_ON_ALL(_DECLARE_SOA_DUMP_INFO, ~, __VA_ARGS__) \ + std::cout << "Final offset = " << offset << " computeDataSize(...): " << computeDataSize(nElements, byteAlignment) << std::endl;\ + std::cout << std::endl; \ + } \ + /* Helper function used by caller to externally allocate the storage */ \ + static size_t computeDataSize(size_t nElements, size_t byteAlignment = defaultAlignment) { \ + size_t ret = 0; \ + _ITERATE_ON_ALL(_ACCUMULATE_SOA_ELEMENT, ~, __VA_ARGS__) \ + return ret; \ + } \ + \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct SoAMetadata { \ + friend CLASS; \ + SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ + SOA_HOST_DEVICE_INLINE size_t byteSize() const { return parent_.byteSize_; } \ + SOA_HOST_DEVICE_INLINE size_t byteAlignment() const { return parent_.byteAlignment_; } \ + SOA_HOST_DEVICE_INLINE std::byte* data() const { return parent_.mem_; } \ + SOA_HOST_DEVICE_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ + SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) { \ + return CLASS(addr, parent_.nElements_, parent_.byteAlignment_ ); \ + } \ + _ITERATE_ON_ALL(_COMPUTE_SOA_COLUMN_PITCH, ~, __VA_ARGS__) \ + _ITERATE_ON_ALL(_DEFINE_SOA_COLUMN_TYPES, ~, __VA_ARGS__) \ + \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ + const CLASS& parent_; \ + }; \ + friend SoAMetadata; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + \ + /* Trivial constuctor */ \ + CLASS(): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION, ~, __VA_ARGS__) \ + {} \ + \ + /* Constructor relying on user provided storage */ \ + SOA_HOST_ONLY CLASS(std::byte* mem, size_t nElements, size_t byteAlignment = defaultAlignment): \ + mem_(mem), nElements_(nElements), byteAlignment_(byteAlignment) { \ + auto curMem = mem_; \ + _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ + /* Sanity check: we should have reached the computed size, only on host code */ \ + byteSize_ = computeDataSize(nElements_, byteAlignment_); \ + if(mem_ + byteSize_ != curMem) \ + throw std::out_of_range("In " #CLASS "::" #CLASS ": unexpected end pointer."); \ + } \ + \ + /* Constructor relying on user provided storage */ \ + SOA_DEVICE_ONLY CLASS(bool devConstructor, std::byte* mem, size_t nElements, size_t byteAlignment = defaultAlignment): \ + mem_(mem), nElements_(nElements), byteAlignment_(byteAlignment) { \ + auto curMem = mem_; \ + _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ + } \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, \ + /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__) \ + ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) \ + {} \ + _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_ACCESSOR, ~, __VA_ARGS__) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ + }; \ + \ + struct element { \ + SOA_HOST_DEVICE_INLINE \ + element(size_t index, \ + /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__) \ + ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) \ + {} \ + SOA_HOST_DEVICE_INLINE \ + element& operator=(const element& other) { \ + _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_COPY, ~, __VA_ARGS__) \ + return *this; \ + } \ + _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ + }; \ + \ + /* AoS-like accessor (non-const) */ \ + SOA_HOST_DEVICE_INLINE \ + element operator[](size_t index) { \ + rangeCheck(index); \ + return element(index, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__) ); \ + } \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const const_element operator[](size_t index) const { \ + rangeCheck(index); \ + return const_element(index, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__) ); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_SOA_ACCESSOR, ~, __VA_ARGS__) \ + _ITERATE_ON_ALL(_DECLARE_SOA_CONST_ACCESSOR, ~, __VA_ARGS__) \ + \ + /* dump the SoA internal structure */ \ + template SOA_HOST_ONLY friend void dump(); \ + \ +private: \ + /* Range checker conditional to the macro _DO_RANGECHECK */ \ + SOA_HOST_DEVICE_INLINE \ + void rangeCheck(size_t index) const { \ + if constexpr (_DO_RANGECHECK) { \ + if (index >= nElements_) { \ + printf("In " #CLASS "::rangeCheck(): index out of range: %zu with nElements: %zu\n", index, nElements_); \ + assert(false); \ + } \ + } \ + } \ + \ + /* data members */ \ + std::byte* mem_; \ + size_t nElements_; \ + size_t byteSize_; \ + size_t byteAlignment_; \ + _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ +} + +#endif // ndef DataStrcutures_SoAStore_h diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h new file mode 100644 index 000000000..6b26d42fb --- /dev/null +++ b/src/cudadev/DataFormats/SoAView.h @@ -0,0 +1,351 @@ +/* + * Structure-of-Arrays template allowing access to a selection of scalars and columns from one + * or multiple SoAStores. A SoAStore is already a view to its complete set of columns. + * This class will allow handling subsets of columns or set of columns from multiple SoAViews, possibly + * with varying columns lengths. + */ + +#ifndef DataStrcutures_SoAView_h +#define DataStrcutures_SoAView_h + +#include "SoACommon.h" + +#define SoA_view_store(TYPE, NAME) \ + (TYPE, NAME) + +#define SoA_view_store_list(...) \ + __VA_ARGS__ + +#define SoA_view_value(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + (STORE_NAME, STORE_MEMBER, LOCAL_NAME) + +#define SoA_view_value_list(...) \ + __VA_ARGS__ + +/* + * A macro defining a SoA view (collection of coluns from multiple stores) + * + * Usage: + * generate_SoA_view(PixelXYView, + * SoA_view_store_list( + * SoA_view_store(PixelDigis, pixelDigis), + * SoA_view_store(PixelRecHitsStore, pixelsRecHit) + * ), + * SoA_view_value_list( + * SoA_view_value(pixelDigis, x, digisX), + * SoA_view_value(pixelDigis, y, digisY), + * SoA_view_value(pixelsRecHit, x, recHitsX), + * SoA_view_value(pixelsRecHit, y, recHitsY) + * ) + * ); + * + */ + +/* + * Members definitions macros for viewa + */ + + +/** + * Store types aliasing for referencing by name + */ +#define _DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL(TYPE, NAME) \ + typedef TYPE BOOST_PP_CAT(TypeOf_, NAME); + +#define _DECLARE_VIEW_STORE_TYPE_ALIAS(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL TYPE_NAME) + +/** + * Member types aliasing for referencing by name + */ +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + typedef BOOST_PP_CAT(TypeOf_, STORE_NAME) :: SoAMetadata:: BOOST_PP_CAT(TypeOf_, STORE_MEMBER) BOOST_PP_CAT(TypeOf_, LOCAL_NAME); + +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL STORE_MEMBER_NAME) + +/** + * Member assignment for trivial constructor + */ +#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + ( BOOST_PP_CAT(LOCAL_NAME, _) (nullptr) ) + +#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL STORE_MEMBER_NAME) + +/** + * Generator of parameters (stores) for constructor. + */ +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(STORE_TYPE, STORE_NAME, DATA) \ + ( DATA STORE_TYPE & STORE_NAME ) + +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) + +/** + * Generator of member initialization from constructor. + */ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(STORE, MEMBER, NAME) \ + ( BOOST_PP_CAT(NAME, _) ( STORE . MEMBER () ) ) + +#define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL STORE_MEMBER_NAME) + +/** + * Generator of element members initializer. + */ +#define _DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL(STORE, MEMBER, LOCAL_NAME, DATA) \ + ( LOCAL_NAME (DATA, LOCAL_NAME) ) + +#define _DECLARE_VIEW_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) + +/** + * Helper macro extracting the data type from a column accessor in a store + */ +#define _COLUMN_TYPE(STORE_NAME, STORE_MEMBER) \ + typename std::remove_pointer< decltype (BOOST_PP_CAT(STORE_NAME, Type)() :: STORE_MEMBER () ) >::type + +/** + * Generator of parameters for (non-const) element subclass (expanded comma separated). + */ +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + ( DATA BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME ) + +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, STORE_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK (STORE_MEMBER_NAME, DATA) + +/** + * Generator of member initialization for constructor of element subclass + */ +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + (BOOST_PP_CAT(LOCAL_NAME, _) (DATA, LOCAL_NAME) ) + +/* declare AoS-like element value args for contructor; these should expand,for columns only */ +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) + + +/** + * Declaration of the members accessors of the const element subclass + */ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _)(); \ + } + +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ + _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL STORE_MEMBER_NAME + +/** + * Declaration of the private members of the const element subclass + */ +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + const SoAConstValue< BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) > BOOST_PP_CAT(LOCAL_NAME, _); \ + +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ + _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME + +/** + * Generator of the member-by-member copy operator of the element subclass. + */ +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + LOCAL_NAME() = other.LOCAL_NAME(); + +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL STORE_MEMBER_NAME) + +/** + * Declaration of the private members of the const element subclass + */ +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + SoAValue LOCAL_NAME; \ + +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME + +/** + * Parameters passed to element subclass constructor in operator[] + */ +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + (BOOST_PP_CAT(LOCAL_NAME, _)) + +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL STORE_MEMBER_NAME) + +/** + * Direct access to column pointer and indexed access + */ +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + /* Column */ \ + SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME() { return BOOST_PP_CAT(LOCAL_NAME, _); } \ + SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) & LOCAL_NAME(size_t index) { \ + return BOOST_PP_CAT(LOCAL_NAME, _)[index]; \ + } + +#define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL STORE_MEMBER_NAME) + +/** + * Direct access to column pointer (const) and indexed access. + */ +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + /* Column */ \ + SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) const * LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _); \ + } \ + SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME(size_t index) const { \ + return LOAD_INCOHERENT(BOOST_PP_CAT(LOCAL_NAME, _) + index); \ + } + +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL STORE_MEMBER_NAME) + +/** + * SoA class member declaration (column pointers). + */ +#define _DECLARE_VIEW_SOA_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + DATA BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _) = nullptr; + +#define _DECLARE_VIEW_SOA_MEMBER(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) + +#define generate_SoA_view(CLASS, STORES_LIST, VALUE_LIST) \ +struct CLASS { \ + \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct SoAMetadata { \ + /* Alias store types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, ~, VALUE_LIST) \ + }; \ + \ + /* Trivial constuctor */ \ + CLASS(): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) \ + {} \ + \ + /* Constructor relying on user provided stores */ \ + SOA_HOST_ONLY CLASS ( _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), STORES_LIST) ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, \ + /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST) \ + ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) \ + {} \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + struct element { \ + SOA_HOST_DEVICE_INLINE \ + element(size_t index, \ + /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, BOOST_PP_EMPTY(), VALUE_LIST) \ + ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEM_MEMBER_INIT, index, VALUE_LIST) \ + {} \ + SOA_HOST_DEVICE_INLINE \ + element& operator=(const element& other) { \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_COPY, ~, VALUE_LIST) \ + return *this; \ + } \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + /* AoS-like accessor (non-const) */ \ + SOA_HOST_DEVICE_INLINE \ + element operator[](size_t index) { \ + return element(index, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST) ); \ + } \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const const_element operator[](size_t index) const { \ + return const_element(index, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST) ); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_ACCESSOR, ~, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template SOA_HOST_ONLY friend void dump(); \ + \ +private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ +} + +#define generate_SoA_const_view(CLASS, STORES_LIST, VALUE_LIST) \ +struct CLASS { \ + \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct SoAMetadata { \ + /* Alias store types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, ~, VALUE_LIST) \ + }; \ + \ + /* Trivial constuctor */ \ + CLASS(): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) \ + {} \ + \ + /* Constructor relying on user provided stores */ \ + SOA_HOST_ONLY CLASS ( _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, STORES_LIST) ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, \ + /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST) \ + ): \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) \ + {} \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const const_element operator[](size_t index) const { \ + return const_element(index, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST) ); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template SOA_HOST_ONLY friend void dump(); \ + \ +private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ +} + +#endif // ndef DataStrcutures_SoAView_h \ No newline at end of file From d8db9e95a89a421b570814dd0d7b6559eeef2c81 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 12 Oct 2021 14:24:14 +0200 Subject: [PATCH 03/50] [cudadev] Converted SiPixelROCsStatusAndMapping to SoA. --- .../CondFormats/SiPixelROCsStatusAndMapping.h | 22 ++++----- .../SiPixelROCsStatusAndMappingWrapper.cc | 42 ++++++++--------- .../SiPixelROCsStatusAndMappingWrapper.h | 22 +++++---- ...elROCsStatusAndMappingWrapperESProducer.cc | 6 ++- .../SiPixelRawToClusterCUDA.cc | 2 +- .../SiPixelRawToClusterGPUKernel.cu | 45 +++++++++---------- .../SiPixelRawToClusterGPUKernel.h | 8 +--- 7 files changed, 71 insertions(+), 76 deletions(-) diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index f7cd8dedc..f46f79da4 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -1,6 +1,8 @@ #ifndef CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h #define CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h +#include "DataFormats/SoAStore.h" + namespace pixelgpudetails { // Maximum fed for phase1 is 150 but not all of them are filled // Update the number FED based on maximum fed found in the cabling map @@ -11,15 +13,15 @@ namespace pixelgpudetails { constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); } // namespace pixelgpudetails -struct SiPixelROCsStatusAndMapping { - alignas(128) unsigned int fed[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int link[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int roc[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int rawId[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int rocInDet[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int moduleId[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned char badRocs[pixelgpudetails::MAX_SIZE]; - alignas(128) unsigned int size = 0; -}; +generate_SoA_store(SiPixelROCsStatusAndMapping, + SoA_column(unsigned int, fed), + SoA_column(unsigned int, link), + SoA_column(unsigned int, roc), + SoA_column(unsigned int, rawId), + SoA_column(unsigned int, rocInDet), + SoA_column(unsigned int, moduleId), + SoA_column(unsigned char, badRocs), + SoA_scalar(unsigned int, size) +); #endif // CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc index 9201903db..c3eafcaf3 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc @@ -13,44 +13,40 @@ #include "CUDACore/host_unique_ptr.h" #include "CUDADataFormats/gpuClusteringConstants.h" #include "CondFormats/SiPixelROCsStatusAndMappingWrapper.h" +#include "CUDACore/copyAsync.h" SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const& cablingMap, std::vector modToUnp) : modToUnpDefault(modToUnp.size()), hasQuality_(true) { - cudaCheck(cudaMallocHost(&cablingMapHost, sizeof(SiPixelROCsStatusAndMapping))); - std::memcpy(cablingMapHost, &cablingMap, sizeof(SiPixelROCsStatusAndMapping)); - + // TODO: check if cudaStreamDefault is appropriate + auto cablingMapMetadata = cablingMap.soaMetadata(); + cablingMapHostBuffer = cms::cuda::make_host_unique(cablingMapMetadata.byteSize(), cudaStreamDefault); + std::memcpy(cablingMapHostBuffer.get(), cablingMapMetadata.data(), cablingMapMetadata.byteSize()); std::copy(modToUnp.begin(), modToUnp.end(), modToUnpDefault.begin()); } -SiPixelROCsStatusAndMappingWrapper::~SiPixelROCsStatusAndMappingWrapper() { cudaCheck(cudaFreeHost(cablingMapHost)); } - -const SiPixelROCsStatusAndMapping* SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync( - cudaStream_t cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { - // allocate - cudaCheck(cudaMalloc(&data.cablingMapDevice, sizeof(SiPixelROCsStatusAndMapping))); - - // transfer - cudaCheck(cudaMemcpyAsync( - data.cablingMapDevice, this->cablingMapHost, sizeof(SiPixelROCsStatusAndMapping), cudaMemcpyDefault, stream)); - }); +const SiPixelROCsStatusAndMapping & SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, + [this](GPUData& data, cudaStream_t stream) { + // allocate + data.allocate(pixelgpudetails::MAX_SIZE, stream); + // transfer + cms::cuda::copyAsync(data.cablingMapDeviceBuffer, this->cablingMapHostBuffer, + data.cablingMapDevice.soaMetadata().byteSize(), stream); + } + ); return data.cablingMapDevice; } const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { const auto& data = modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cudaStream_t stream) { - cudaCheck(cudaMalloc((void**)&data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL)); - cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, + data.modToUnpDefault = cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE_BYTE_BOOL, stream); + cudaCheck(cudaMemcpyAsync(data.modToUnpDefault.get(), this->modToUnpDefault.data(), this->modToUnpDefault.size() * sizeof(unsigned char), cudaMemcpyDefault, stream)); }); - return data.modToUnpDefault; -} - -SiPixelROCsStatusAndMappingWrapper::GPUData::~GPUData() { cudaCheck(cudaFree(cablingMapDevice)); } - -SiPixelROCsStatusAndMappingWrapper::ModulesToUnpack::~ModulesToUnpack() { cudaCheck(cudaFree(modToUnpDefault)); } + return data.modToUnpDefault.get(); +} \ No newline at end of file diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 8917a35cc..3139a7b13 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -3,6 +3,7 @@ #include "CUDACore/ESProduct.h" #include "CUDACore/HostAllocator.h" +#include "CUDACore/host_unique_ptr.h" #include "CUDACore/device_unique_ptr.h" #include "CondFormats/SiPixelROCsStatusAndMapping.h" @@ -13,13 +14,12 @@ class SiPixelROCsStatusAndMappingWrapper { public: explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const &cablingMap, - std::vector modToUnp); - ~SiPixelROCsStatusAndMappingWrapper(); + std::vector modToUnp); bool hasQuality() const { return hasQuality_; } // returns pointer to GPU memory - const SiPixelROCsStatusAndMapping *getGPUProductAsync(cudaStream_t cudaStream) const; + const SiPixelROCsStatusAndMapping & getGPUProductAsync(cudaStream_t cudaStream) const; // returns pointer to GPU memory const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const; @@ -28,17 +28,23 @@ class SiPixelROCsStatusAndMappingWrapper { std::vector> modToUnpDefault; bool hasQuality_; - SiPixelROCsStatusAndMapping *cablingMapHost = nullptr; // pointer to struct in CPU + cms::cuda::host::unique_ptr cablingMapHostBuffer; // host pined memory for cabling map. struct GPUData { - ~GPUData(); - SiPixelROCsStatusAndMapping *cablingMapDevice = nullptr; // pointer to struct in GPU + void allocate(size_t size, cudaStream_t stream) { + cablingMapDeviceBuffer = cms::cuda::make_device_unique( + SiPixelROCsStatusAndMapping::computeDataSize(size), stream); + // Explicit call to destructor before overwriting the object. + cablingMapDevice.~SiPixelROCsStatusAndMapping(); + new(&cablingMapDevice) SiPixelROCsStatusAndMapping(cablingMapDeviceBuffer.get(), size); + } + cms::cuda::device::unique_ptr cablingMapDeviceBuffer; + SiPixelROCsStatusAndMapping cablingMapDevice = SiPixelROCsStatusAndMapping(nullptr, 0); // map struct in GPU }; cms::cuda::ESProduct gpuData_; struct ModulesToUnpack { - ~ModulesToUnpack(); - unsigned char *modToUnpDefault = nullptr; // pointer to GPU + cms::cuda::device::unique_ptr modToUnpDefault; // pointer to GPU }; cms::cuda::ESProduct modToUnp_; }; diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc index c77e1ed35..e29ccbbcc 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -30,8 +30,10 @@ void SiPixelROCsStatusAndMappingWrapperESProducer::produce(edm::EventSetup& even { std::ifstream in(data_ / "cablingMap.bin", std::ios::binary); in.exceptions(std::ifstream::badbit | std::ifstream::failbit | std::ifstream::eofbit); - SiPixelROCsStatusAndMapping obj; - in.read(reinterpret_cast(&obj), sizeof(SiPixelROCsStatusAndMapping)); + // We use default alignment + auto objBuffer = std::make_unique(SiPixelROCsStatusAndMapping::computeDataSize(pixelgpudetails::MAX_SIZE)); + SiPixelROCsStatusAndMapping obj(objBuffer.get(), pixelgpudetails::MAX_SIZE); + in.read(reinterpret_cast(obj.soaMetadata().data()), obj.soaMetadata().byteSize()); unsigned int modToUnpDefSize; in.read(reinterpret_cast(&modToUnpDefSize), sizeof(unsigned int)); std::vector modToUnpDefault(modToUnpDefSize); diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc index a5229b295..f59a1a9d6 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc @@ -82,7 +82,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, ") differs the one from SiPixelROCsStatusAndMappingWrapper. Please fix your configuration."); } // get the GPU product already here so that the async transfer can begin - const auto* gpuMap = hgpuMap.getGPUProductAsync(ctx.stream()); + const auto & gpuMap = hgpuMap.getGPUProductAsync(ctx.stream()); const unsigned char* gpuModulesToUnpack = hgpuMap.getModToUnpAllAsync(ctx.stream()); auto const& hgains = iSetup.get(); diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu index aaa72c5e0..c894161df 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu @@ -54,19 +54,14 @@ namespace pixelgpudetails { //////////////////// + __device__ uint32_t cablingIndex(uint8_t fed, uint32_t link, uint32_t roc) { + return fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + } + __device__ bool isBarrel(uint32_t rawId) { return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask)); } - __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelROCsStatusAndMapping *cablingMap, - uint8_t fed, - uint32_t link, - uint32_t roc) { - uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; - pixelgpudetails::DetIdGPU detId = { - cablingMap->rawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index]}; - return detId; - } //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 @@ -187,7 +182,7 @@ namespace pixelgpudetails { __device__ uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, - const SiPixelROCsStatusAndMapping *cablingMap, + const SiPixelROCsStatusAndMapping &cablingMap, bool debug = false) { uint8_t errorType = (errorWord >> sipixelconstants::ROC_shift) & sipixelconstants::ERROR_mask; if (errorType < 25) @@ -197,9 +192,9 @@ namespace pixelgpudetails { switch (errorType) { case (25): { errorFound = true; - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + 1; - if (index > 1 && index <= cablingMap->size) { - if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) + auto index = cablingIndex(fedId, link, 1); + if (index > 1 && index <= cablingMap.size()) { + if (!(link == cablingMap[index].link() && 1 == cablingMap[index].roc())) errorFound = false; } if (debug and errorFound) @@ -267,7 +262,7 @@ namespace pixelgpudetails { __device__ uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, - const SiPixelROCsStatusAndMapping *cablingMap, + const SiPixelROCsStatusAndMapping &cablingMap, bool debug = false) { uint32_t rID = 0xffffffff; @@ -279,7 +274,7 @@ namespace pixelgpudetails { case 40: { uint32_t roc = 1; uint32_t link = sipixelconstants::getLink(errWord); - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + uint32_t rID_temp = cablingMap[cablingIndex(fedId, link, roc)].rawId(); if (rID_temp != gpuClustering::invalidModuleId) rID = rID_temp; break; @@ -312,7 +307,7 @@ namespace pixelgpudetails { uint32_t roc = 1; uint32_t link = chanNmbr; - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + uint32_t rID_temp = cablingMap[cablingIndex(fedId, link, roc)].rawId(); if (rID_temp != gpuClustering::invalidModuleId) rID = rID_temp; break; @@ -321,7 +316,7 @@ namespace pixelgpudetails { case 38: { uint32_t roc = sipixelconstants::getROC(errWord); uint32_t link = sipixelconstants::getLink(errWord); - uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).rawId; + uint32_t rID_temp = cablingMap[cablingIndex(fedId, link, roc)].rawId(); if (rID_temp != gpuClustering::invalidModuleId) rID = rID_temp; break; @@ -334,7 +329,7 @@ namespace pixelgpudetails { } // Kernel to perform Raw to Digi conversion - __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping *cablingMap, + __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, @@ -374,7 +369,8 @@ namespace pixelgpudetails { uint32_t link = sipixelconstants::getLink(ww); // Extract link uint32_t roc = sipixelconstants::getROC(ww); // Extract Roc in link - pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + auto index = cablingIndex(fedId, link, roc); + auto detId = cablingMap[index]; uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); @@ -384,13 +380,12 @@ namespace pixelgpudetails { continue; } - uint32_t rawId = detId.rawId; - uint32_t rocIdInDetUnit = detId.rocInDet; + auto rawId = detId.rawId(); + auto rocIdInDetUnit = detId.rocInDet(); bool barrel = isBarrel(rawId); - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; if (useQualityInfo) { - skipROC = cablingMap->badRocs[index]; + skipROC = cablingMap[index].badRocs(); if (skipROC) continue; } @@ -450,7 +445,7 @@ namespace pixelgpudetails { yy[gIndex] = globalPix.col; // origin shifting by 1 0-415 adc[gIndex] = sipixelconstants::getADC(ww); pdigi[gIndex] = pixelgpudetails::pack(globalPix.row, globalPix.col, adc[gIndex]); - moduleId[gIndex] = detId.moduleId; + moduleId[gIndex] = detId.moduleId(); rawIdArr[gIndex] = rawId; } // end of loop (gIndex < end) @@ -499,7 +494,7 @@ namespace pixelgpudetails { // Interface to outside void SiPixelRawToClusterGPUKernel::makeClustersAsync(bool isRun2, const SiPixelClusterThresholds clusterThresholds, - const SiPixelROCsStatusAndMapping *cablingMap, + const SiPixelROCsStatusAndMapping &cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender &wordFed, diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h index 04e8b99b9..9de7f682a 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h @@ -44,12 +44,6 @@ namespace pixelgpudetails { const uint32_t MAX_WORD = 2000; - struct DetIdGPU { - uint32_t rawId; - uint32_t rocInDet; - uint32_t moduleId; - }; - struct Pixel { uint32_t row; uint32_t col; @@ -141,7 +135,7 @@ namespace pixelgpudetails { void makeClustersAsync(bool isRun2, const SiPixelClusterThresholds clusterThresholds, - const SiPixelROCsStatusAndMapping* cablingMap, + const SiPixelROCsStatusAndMapping &cablingMap, const unsigned char* modToUnp, const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, From 7e0156248260ba6d2438dbd7ae82bb4b5ed33150 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 12 Oct 2021 17:05:03 +0200 Subject: [PATCH 04/50] [cudadev] Moved SiPixelClustersCUDA to SoA store. --- .../CUDADataFormats/SiPixelClustersCUDA.cc | 18 ++----- .../CUDADataFormats/SiPixelClustersCUDA.h | 54 ++++++++----------- .../PixelRecHitGPUKernel.cu | 2 +- .../plugin-SiPixelRecHits/gpuPixelRecHits.h | 14 ++--- src/cudadev/test/TrackingRecHit2DCUDA_t.cu | 4 +- 5 files changed, 37 insertions(+), 55 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc index a9feabb92..6d9472654 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc @@ -3,17 +3,9 @@ #include "CUDACore/host_unique_ptr.h" #include "CUDADataFormats/SiPixelClustersCUDA.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) - : moduleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)), - clusInModule_d(cms::cuda::make_device_unique(maxModules, stream)), - moduleId_d(cms::cuda::make_device_unique(maxModules, stream)), - clusModuleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)) { - auto view = cms::cuda::make_host_unique(stream); - view->moduleStart_ = moduleStart_d.get(); - view->clusInModule_ = clusInModule_d.get(); - view->moduleId_ = moduleId_d.get(); - view->clusModuleStart_ = clusModuleStart_d.get(); +SiPixelClustersCUDA::SiPixelClustersCUDA(): data_d(), deviceStore_(data_d.get(), 0) {} - view_d = cms::cuda::make_device_unique(stream); - cms::cuda::copyAsync(view_d, view, stream); -} +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) + : data_d(cms::cuda::make_device_unique(DeviceStore::computeDataSize(maxModules), stream)), + deviceStore_(data_d.get(), maxModules) +{} diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index e93b742cf..4543c7551 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -4,12 +4,22 @@ #include "CUDACore/device_unique_ptr.h" #include "CUDACore/host_unique_ptr.h" #include "CUDACore/cudaCompat.h" +#include "DataFormats/SoAStore.h" #include class SiPixelClustersCUDA { public: - SiPixelClustersCUDA() = default; + generate_SoA_store(DeviceStore, + SoA_column(uint32_t, moduleStart), // index of the first pixel of each module + SoA_column(uint32_t, clusInModule), // number of clusters found in each module + SoA_column(uint32_t, moduleId), // module id of each module + + // originally from rechits + SoA_column(uint32_t, clusModuleStart) // index of the first cluster of each module + ); + + explicit SiPixelClustersCUDA(); explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream); ~SiPixelClustersCUDA() = default; @@ -22,41 +32,21 @@ class SiPixelClustersCUDA { uint32_t nClusters() const { return nClusters_h; } - uint32_t *moduleStart() { return moduleStart_d.get(); } - uint32_t *clusInModule() { return clusInModule_d.get(); } - uint32_t *moduleId() { return moduleId_d.get(); } - uint32_t *clusModuleStart() { return clusModuleStart_d.get(); } - - uint32_t const *moduleStart() const { return moduleStart_d.get(); } - uint32_t const *clusInModule() const { return clusInModule_d.get(); } - uint32_t const *moduleId() const { return moduleId_d.get(); } - uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); } - - class DeviceConstView { - public: - __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); } - __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); } - __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); } - __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); } + uint32_t *moduleStart() { return deviceStore_.moduleStart(); } + uint32_t *clusInModule() { return deviceStore_.clusInModule(); } + uint32_t *moduleId() { return deviceStore_.moduleId(); } + uint32_t *clusModuleStart() { return deviceStore_.clusModuleStart(); } - uint32_t const *moduleStart_; - uint32_t const *clusInModule_; - uint32_t const *moduleId_; - uint32_t const *clusModuleStart_; - }; + uint32_t const *moduleStart() const { return deviceStore_.moduleStart(); } + uint32_t const *clusInModule() const { return deviceStore_.clusInModule(); } + uint32_t const *moduleId() const { return deviceStore_.moduleId(); } + uint32_t const *clusModuleStart() const { return deviceStore_.clusModuleStart(); } - DeviceConstView *view() const { return view_d.get(); } + const DeviceStore store() const { return deviceStore_; } private: - cms::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module - cms::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module - cms::cuda::device::unique_ptr moduleId_d; // module id of each module - - // originally from rechits - cms::cuda::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module - - cms::cuda::device::unique_ptr view_d; // "me" pointer - + cms::cuda::device::unique_ptr data_d; // Single SoA storage + DeviceStore deviceStore_; uint32_t nClusters_h = 0; }; diff --git a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu index ba62da1b5..ca18e03f5 100644 --- a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu @@ -50,7 +50,7 @@ namespace pixelgpudetails { // protect from empty events if (blocks) { gpuPixelRecHits::getHits<<>>( - cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.view(), hits_d.view()); + cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.store(), hits_d.view()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaCheck(cudaDeviceSynchronize()); diff --git a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h index adddc8b83..405af8eb3 100644 --- a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h +++ b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h @@ -18,7 +18,7 @@ namespace gpuPixelRecHits { BeamSpotPOD const* __restrict__ bs, SiPixelDigisCUDA::DeviceConstView const* __restrict__ pdigis, int numElements, - SiPixelClustersCUDA::DeviceConstView const* __restrict__ pclusters, + SiPixelClustersCUDA::DeviceStore const pclusters, TrackingRecHit2DSOAView* phits) { // FIXME // the compiler seems NOT to optimize loads from views (even in a simple test case) @@ -31,7 +31,7 @@ namespace gpuPixelRecHits { auto& hits = *phits; auto const digis = *pdigis; // the copy is intentional! - auto const& clusters = *pclusters; + auto const& clusters = pclusters; // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) if (0 == blockIdx.x) { @@ -62,8 +62,8 @@ namespace gpuPixelRecHits { // as usual one block per module __shared__ ClusParams clusParams; - auto me = clusters.moduleId(blockIdx.x); - int nclus = clusters.clusInModule(me); + auto me = clusters[blockIdx.x].moduleId(); + int nclus = clusters[me].clusInModule(); if (0 == nclus) return; @@ -108,7 +108,7 @@ namespace gpuPixelRecHits { __syncthreads(); // one thread per "digi" - auto first = clusters.moduleStart(1 + blockIdx.x) + threadIdx.x; + auto first = clusters[1 + blockIdx.x].moduleStart() + threadIdx.x; for (int i = first; i < numElements; i += blockDim.x) { auto id = digis.moduleInd(i); if (id == invalidModuleId) @@ -164,12 +164,12 @@ namespace gpuPixelRecHits { // next one cluster per thread... - first = clusters.clusModuleStart(me) + startClus; + first = clusters[me].clusModuleStart() + startClus; for (int ic = threadIdx.x; ic < nClusInIter; ic += blockDim.x) { auto h = first + ic; // output index in global memory assert(h < hits.nHits()); - assert(h < clusters.clusModuleStart(me + 1)); + assert(h < clusters[me + 1].clusModuleStart()); pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); diff --git a/src/cudadev/test/TrackingRecHit2DCUDA_t.cu b/src/cudadev/test/TrackingRecHit2DCUDA_t.cu index 5f3a26391..4572d8f42 100644 --- a/src/cudadev/test/TrackingRecHit2DCUDA_t.cu +++ b/src/cudadev/test/TrackingRecHit2DCUDA_t.cu @@ -6,7 +6,7 @@ namespace testTrackingRecHit2D { __global__ void fill(TrackingRecHit2DSOAView* phits) { assert(phits); - auto& hits = *phits; + [[maybe_unused]] auto& hits = *phits; assert(hits.nHits() == 200); int i = threadIdx.x; @@ -16,7 +16,7 @@ namespace testTrackingRecHit2D { __global__ void verify(TrackingRecHit2DSOAView const* phits) { assert(phits); - auto const& hits = *phits; + [[maybe_unused]] auto const& hits = *phits; assert(hits.nHits() == 200); int i = threadIdx.x; From d72582a8688a190db55f2f9c54e7bf1cbddec8bd Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 13 Oct 2021 10:11:26 +0200 Subject: [PATCH 05/50] [cudadev] Moved SiPixelDigisCUDA to SoA store and views. --- .../CUDADataFormats/SiPixelDigisCUDA.cc | 76 +++++----- .../CUDADataFormats/SiPixelDigisCUDA.h | 137 ++++++++++++------ .../SiPixelDigisSoAFromCUDA.cc | 18 +-- .../PixelRecHitGPUKernel.cu | 2 +- .../plugin-SiPixelRecHits/gpuPixelRecHits.h | 22 +-- 5 files changed, 149 insertions(+), 106 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc index fd87fee56..9bc8207ac 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc @@ -5,44 +5,54 @@ #include "CUDACore/host_unique_ptr.h" SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) - : xx_d(cms::cuda::make_device_unique(maxFedWords, stream)), - yy_d(cms::cuda::make_device_unique(maxFedWords, stream)), - adc_d(cms::cuda::make_device_unique(maxFedWords, stream)), - moduleInd_d(cms::cuda::make_device_unique(maxFedWords, stream)), - clus_d(cms::cuda::make_device_unique(maxFedWords, stream)), - view_d(cms::cuda::make_device_unique(stream)), - pdigi_d(cms::cuda::make_device_unique(maxFedWords, stream)), - rawIdArr_d(cms::cuda::make_device_unique(maxFedWords, stream)) { - auto view = cms::cuda::make_host_unique(stream); - view->xx_ = xx_d.get(); - view->yy_ = yy_d.get(); - view->adc_ = adc_d.get(); - view->moduleInd_ = moduleInd_d.get(); - view->clus_ = clus_d.get(); - - cms::cuda::copyAsync(view_d, view, stream); -} + : data_d(cms::cuda::make_device_unique( + DeviceOnlyStore::computeDataSize(maxFedWords) + + HostDeviceStore::computeDataSize(maxFedWords), + stream)), + deviceOnlyStore_d(data_d.get(), maxFedWords), + hostDeviceStore_d(deviceOnlyStore_d.soaMetadata().nextByte(), maxFedWords), + deviceFullView_(deviceOnlyStore_d, hostDeviceStore_d), + devicePixelView_(deviceFullView_) +{} -cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(nDigis(), stream); - cms::cuda::copyAsync(ret, adc_d, nDigis(), stream); - return ret; -} +SiPixelDigisCUDA::SiPixelDigisCUDA() + : data_d(),deviceOnlyStore_d(), hostDeviceStore_d(), deviceFullView_(), devicePixelView_() +{} -cms::cuda::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(nDigis(), stream); - cms::cuda::copyAsync(ret, clus_d, nDigis(), stream); - return ret; +SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer() + : data_h(), hostStore_(nullptr, 0) +{} + +SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream) + : data_h(cms::cuda::make_host_unique(SiPixelDigisCUDA::HostDeviceStore::computeDataSize(maxFedWords), stream)), + hostStore_(data_h.get(), maxFedWords) +{} + +void SiPixelDigisCUDA::HostStoreAndBuffer::reset() { + hostStore_.~HostDeviceStore(); + new(&hostStore_) HostDeviceStore(nullptr, 0); + data_h.reset(); } -cms::cuda::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(nDigis(), stream); - cms::cuda::copyAsync(ret, pdigi_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + // TODO: this is downgraded from cms::cuda::copyAsync as we copy data from within a block but not the full block. + cudaCheck(cudaMemcpyAsync(ret.get(), deviceFullView_.adc(), nDigis() * sizeof(decltype(ret[0])), cudaMemcpyDeviceToHost, stream)); return ret; } -cms::cuda::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(nDigis(), stream); - cms::cuda::copyAsync(ret, rawIdArr_d, nDigis(), stream); +SiPixelDigisCUDA::HostStoreAndBuffer SiPixelDigisCUDA::dataToHostAsync(cudaStream_t stream) const { + // Allocate the needed space only and build the compact data in place in host memory (from the larger device memory). + // Due to the compaction with the 2D copy, we need to know the precise geometry, and hence operate on the store (as opposed + // to the view, which is unaware of the column pitches. + HostStoreAndBuffer ret(nDigis(), stream); + cudaCheck(cudaMemcpyAsync(ret.hostStore_.adc(), hostDeviceStore_d.adc(), nDigis_h * sizeof(decltype(*deviceFullView_.adc())), + cudaMemcpyDeviceToHost, stream)); + // Copy the other columns, realigning the data in shorter arrays. clus is the first but all 3 columns (clus, pdigis, rawIdArr) have + // the same geometry. + cudaCheck(cudaMemcpy2DAsync(ret.hostStore_.clus(), ret.hostStore_.soaMetadata().clusPitch(), + hostDeviceStore_d.clus(), hostDeviceStore_d.soaMetadata().clusPitch(), + 3 /* rows */, + nDigis() * sizeof(decltype (*ret.hostStore_.clus())), cudaMemcpyDeviceToHost, stream)); return ret; -} +} \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index 03ae6639a..b68797bdf 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -6,10 +6,62 @@ #include "CUDACore/cudaCompat.h" #include "CUDACore/device_unique_ptr.h" #include "CUDACore/host_unique_ptr.h" +#include "DataFormats/SoAStore.h" +#include "DataFormats/SoAView.h" class SiPixelDigisCUDA { public: - SiPixelDigisCUDA() = default; + generate_SoA_store(DeviceOnlyStore, + /* These are consumed by downstream device code */ + SoA_column(uint16_t, xx), /* local coordinates of each pixel */ + SoA_column(uint16_t, yy), /* */ + SoA_column(uint16_t, moduleInd) /* module id of each pixel */ + ); + + generate_SoA_store(HostDeviceStore, + /* These are also transferred to host (see HostDataView) */ + SoA_column(uint16_t, adc), /* ADC of each pixel */ + SoA_column(int32_t, clus), /* cluster id of each pixel */ + /* These are for CPU output; should we (eventually) place them to a */ + /* separate product? */ + SoA_column(uint32_t, pdigi), /* packed digi (row, col, adc) of each pixel */ + SoA_column(uint32_t, rawIdArr) /* DetId of each pixel */ + ); + + generate_SoA_view(DeviceFullView, + SoA_view_store_list( + SoA_view_store(DeviceOnlyStore, deviceOnly), + SoA_view_store(HostDeviceStore, hostDevice) + ), + SoA_view_value_list( + SoA_view_value(deviceOnly, xx, xx), /* local coordinates of each pixel */ + SoA_view_value(deviceOnly, yy, yy), /* */ + SoA_view_value(deviceOnly, moduleInd, moduleInd), /* module id of each pixel */ + SoA_view_value(hostDevice, adc, adc), /* ADC of each pixel */ + SoA_view_value(hostDevice, clus, clus),/* cluster id of each pixel */ + SoA_view_value(hostDevice, pdigi, pdigi), /* packed digi (row, col, adc) of each pixel */ + SoA_view_value(hostDevice, rawIdArr, rawIdArr) /* DetId of each pixel */ + /* TODO: simple, no rename interface */ + ) + ); + + /* Device pixel view: this is a second generation view (view from view) */ + generate_SoA_const_view(DevicePixelView, + /* We get out data from the DeviceFullStore */ + SoA_view_store_list( + SoA_view_store(DeviceFullView, deviceFullView) + ), + /* These are consumed by downstream device code */ + SoA_view_value_list( + SoA_view_value(deviceFullView, xx, xx), /* local coordinates of each pixel */ + SoA_view_value(deviceFullView, yy, yy), /* */ + SoA_view_value(deviceFullView, moduleInd, moduleInd), /* module id of each pixel */ + SoA_view_value(deviceFullView, adc, adc), /* ADC of each pixel */ + SoA_view_value(deviceFullView, clus, clus) /* cluster id of each pixel */ + ) + ); + + explicit SiPixelDigisCUDA(); explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); ~SiPixelDigisCUDA() = default; @@ -26,60 +78,49 @@ class SiPixelDigisCUDA { uint32_t nModules() const { return nModules_h; } uint32_t nDigis() const { return nDigis_h; } - uint16_t *xx() { return xx_d.get(); } - uint16_t *yy() { return yy_d.get(); } - uint16_t *adc() { return adc_d.get(); } - uint16_t *moduleInd() { return moduleInd_d.get(); } - int32_t *clus() { return clus_d.get(); } - uint32_t *pdigi() { return pdigi_d.get(); } - uint32_t *rawIdArr() { return rawIdArr_d.get(); } - - uint16_t const *xx() const { return xx_d.get(); } - uint16_t const *yy() const { return yy_d.get(); } - uint16_t const *adc() const { return adc_d.get(); } - uint16_t const *moduleInd() const { return moduleInd_d.get(); } - int32_t const *clus() const { return clus_d.get(); } - uint32_t const *pdigi() const { return pdigi_d.get(); } - uint32_t const *rawIdArr() const { return rawIdArr_d.get(); } - - cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; - - class DeviceConstView { + uint16_t *xx() { return deviceFullView_.xx(); } + uint16_t *yy() { return deviceFullView_.yy(); } + uint16_t *adc() { return deviceFullView_.adc(); } + uint16_t *moduleInd() { return deviceFullView_.moduleInd(); } + int32_t *clus() { return deviceFullView_.clus(); } + uint32_t *pdigi() { return deviceFullView_.pdigi(); } + uint32_t *rawIdArr() { return deviceFullView_.rawIdArr(); } + + uint16_t const *xx() const { return deviceFullView_.xx(); } + uint16_t const *yy() const { return deviceFullView_.yy(); } + uint16_t const *adc() const { return deviceFullView_.adc(); } + uint16_t const *moduleInd() const { return deviceFullView_.moduleInd(); } + int32_t const *clus() const { return deviceFullView_.clus(); } + uint32_t const *pdigi() const { return deviceFullView_.pdigi(); } + uint32_t const *rawIdArr() const { return deviceFullView_.rawIdArr(); } + + class HostStoreAndBuffer { + friend SiPixelDigisCUDA; public: - __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); } - __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); } - __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); } - __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); } - __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); } - - uint16_t const *xx_; - uint16_t const *yy_; - uint16_t const *adc_; - uint16_t const *moduleInd_; - int32_t const *clus_; + HostStoreAndBuffer(); + const SiPixelDigisCUDA::HostDeviceStore store() { return hostStore_; } + void reset(); + private: + HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream); + cms::cuda::host::unique_ptr data_h; + HostDeviceStore hostStore_; }; + HostStoreAndBuffer dataToHostAsync(cudaStream_t stream) const; - const DeviceConstView *view() const { return view_d.get(); } + // Special copy for validation + cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + + const DevicePixelView& pixelView() const { return devicePixelView_; } private: // These are consumed by downstream device code - cms::cuda::device::unique_ptr xx_d; // local coordinates of each pixel - cms::cuda::device::unique_ptr yy_d; // - cms::cuda::device::unique_ptr adc_d; // ADC of each pixel - cms::cuda::device::unique_ptr moduleInd_d; // module id of each pixel - cms::cuda::device::unique_ptr clus_d; // cluster id of each pixel - cms::cuda::device::unique_ptr view_d; // "me" pointer - - // These are for CPU output; should we (eventually) place them to a - // separate product? - cms::cuda::device::unique_ptr pdigi_d; // packed digi (row, col, adc) of each pixel - cms::cuda::device::unique_ptr rawIdArr_d; // DetId of each pixel - + cms::cuda::device::unique_ptr data_d; // Single SoA storage + DeviceOnlyStore deviceOnlyStore_d; + HostDeviceStore hostDeviceStore_d; + DeviceFullView deviceFullView_; + DevicePixelView devicePixelView_; uint32_t nModules_h = 0; uint32_t nDigis_h = 0; }; -#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h +#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h \ No newline at end of file diff --git a/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc b/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc index 448f4b797..1de1e1dda 100644 --- a/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc +++ b/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc @@ -22,10 +22,7 @@ class SiPixelDigisSoAFromCUDA : public edm::EDProducerExternalWork { edm::EDGetTokenT> digiGetToken_; edm::EDPutTokenT digiPutToken_; - cms::cuda::host::unique_ptr pdigi_; - cms::cuda::host::unique_ptr rawIdArr_; - cms::cuda::host::unique_ptr adc_; - cms::cuda::host::unique_ptr clus_; + SiPixelDigisCUDA::HostStoreAndBuffer digis_; size_t nDigis_; }; @@ -43,10 +40,7 @@ void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const auto& gpuDigis = ctx.get(iEvent, digiGetToken_); nDigis_ = gpuDigis.nDigis(); - pdigi_ = gpuDigis.pdigiToHostAsync(ctx.stream()); - rawIdArr_ = gpuDigis.rawIdArrToHostAsync(ctx.stream()); - adc_ = gpuDigis.adcToHostAsync(ctx.stream()); - clus_ = gpuDigis.clusToHostAsync(ctx.stream()); + digis_ = gpuDigis.dataToHostAsync(ctx.stream()); } void SiPixelDigisSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { @@ -60,12 +54,10 @@ void SiPixelDigisSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& // host memory to be allocated without a CUDA stream // - What if a CPU algorithm would produce the same SoA? We can't // use cudaMallocHost without a GPU... - iEvent.emplace(digiPutToken_, nDigis_, pdigi_.get(), rawIdArr_.get(), adc_.get(), clus_.get()); + auto dv = digis_.store(); + iEvent.emplace(digiPutToken_, nDigis_, dv.pdigi(), dv.rawIdArr(), dv.adc(), dv.clus()); - pdigi_.reset(); - rawIdArr_.reset(); - adc_.reset(); - clus_.reset(); + digis_.reset(); } // define as framework plugin diff --git a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu index ca18e03f5..997e5b450 100644 --- a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu @@ -50,7 +50,7 @@ namespace pixelgpudetails { // protect from empty events if (blocks) { gpuPixelRecHits::getHits<<>>( - cpeParams, bs_d.data(), digis_d.view(), digis_d.nDigis(), clusters_d.store(), hits_d.view()); + cpeParams, bs_d.data(), digis_d.pixelView(), digis_d.nDigis(), clusters_d.store(), hits_d.view()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaCheck(cudaDeviceSynchronize()); diff --git a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h index 405af8eb3..5c556f92b 100644 --- a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h +++ b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h @@ -16,7 +16,7 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, BeamSpotPOD const* __restrict__ bs, - SiPixelDigisCUDA::DeviceConstView const* __restrict__ pdigis, + SiPixelDigisCUDA::DevicePixelView pdigis, int numElements, SiPixelClustersCUDA::DeviceStore const pclusters, TrackingRecHit2DSOAView* phits) { @@ -30,7 +30,7 @@ namespace gpuPixelRecHits { auto& hits = *phits; - auto const digis = *pdigis; // the copy is intentional! + auto const digis = pdigis; // the copy is intentional! auto const& clusters = pclusters; // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) @@ -110,19 +110,19 @@ namespace gpuPixelRecHits { // one thread per "digi" auto first = clusters[1 + blockIdx.x].moduleStart() + threadIdx.x; for (int i = first; i < numElements; i += blockDim.x) { - auto id = digis.moduleInd(i); + auto id = digis[i].moduleInd(); if (id == invalidModuleId) continue; // not valid if (id != me) break; // end of module - auto cl = digis.clus(i); + auto cl = digis[i].clus(); if (cl < startClus || cl >= lastClus) continue; cl -= startClus; assert(cl >= 0); assert(cl < MaxHitsInIter); - auto x = digis.xx(i); - auto y = digis.yy(i); + auto x = digis[i].xx(); + auto y = digis[i].yy(); atomicMin(&clusParams.minRow[cl], x); atomicMax(&clusParams.maxRow[cl], x); atomicMin(&clusParams.minCol[cl], y); @@ -135,20 +135,20 @@ namespace gpuPixelRecHits { //auto pixmx = cpeParams->detParams(me).pixmx; auto pixmx = std::numeric_limits::max(); for (int i = first; i < numElements; i += blockDim.x) { - auto id = digis.moduleInd(i); + auto id = digis[i].moduleInd(); if (id == invalidModuleId) continue; // not valid if (id != me) break; // end of module - auto cl = digis.clus(i); + auto cl = digis[i].clus(); if (cl < startClus || cl >= lastClus) continue; cl -= startClus; assert(cl >= 0); assert(cl < MaxHitsInIter); - auto x = digis.xx(i); - auto y = digis.yy(i); - auto ch = std::min(digis.adc(i), pixmx); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + auto ch = std::min(digis[i].adc(), pixmx); atomicAdd(&clusParams.charge[cl], ch); if (clusParams.minRow[cl] == x) atomicAdd(&clusParams.q_f_X[cl], ch); From e8a7735e17554ad3c1fd0f7b36b9268b8feb631e Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 13 Oct 2021 11:21:24 +0200 Subject: [PATCH 06/50] [cudadev] Moved TrackingRecHit2D to SoA store and views. --- .../TrackingRecHit2DHeterogeneous.cc | 42 +----- .../TrackingRecHit2DHeterogeneous.h | 132 ++++++++--------- .../TrackingRecHit2DHostSOAView.cc | 15 ++ .../TrackingRecHit2DHostSOAView.h | 26 ++++ .../CUDADataFormats/TrackingRecHit2DSOAView.h | 137 +++++++++++------- .../plugin-PixelTriplets/BrokenLineFitOnGPU.h | 10 +- .../CAHitNtupletGeneratorKernels.cc | 18 +-- .../CAHitNtupletGeneratorKernels.cu | 18 +-- .../CAHitNtupletGeneratorKernels.h | 10 +- .../CAHitNtupletGeneratorKernelsImpl.h | 12 +- .../CAHitNtupletGeneratorOnGPU.cc | 14 +- .../CAHitNtupletGeneratorOnGPU.h | 6 +- src/cudadev/plugin-PixelTriplets/GPUCACell.h | 26 ++-- .../plugin-PixelTriplets/HelixFitOnGPU.h | 4 +- .../plugin-PixelTriplets/RiemannFitOnGPU.h | 10 +- .../plugin-PixelTriplets/gpuPixelDoublets.h | 4 +- .../gpuPixelDoubletsAlgos.h | 32 ++-- .../PixelRecHitGPUKernel.cu | 4 +- .../plugin-SiPixelRecHits/gpuPixelRecHits.h | 32 ++-- .../plugin-Validation/HistoValidator.cc | 40 ++--- src/cudadev/test/TrackingRecHit2DCUDA_t.cu | 12 +- 21 files changed, 323 insertions(+), 281 deletions(-) create mode 100644 src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc create mode 100644 src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc index 5c1aacaf4..80ddf44d8 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc @@ -3,42 +3,12 @@ #include "CUDACore/device_unique_ptr.h" #include "CUDACore/host_unique_ptr.h" #include "CUDADataFormats/TrackingRecHit2DHeterogeneous.h" +#include "CondFormats/SiPixelROCsStatusAndMapping.h" template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); - cms::cuda::copyAsync(ret, m_store32, 4 * nHits(), stream); +TrackingRecHit2DHostSOAView TrackingRecHit2DCUDA::hitsToHostAsync(cudaStream_t stream) const { + // copy xl, yl, xerr, yerr, xg, yg, zg,rg, charge, clusterSizeX, clusterSizeY. + TrackingRecHit2DHostSOAView ret(nHits(), stream); + cms::cuda::copyAsync(ret.hits_h, m_hitsSupportLayerStartStore, ret.hitsStore_.soaMetadata().byteSize(), stream); return ret; -} - -template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(gpuClustering::maxNumModules + 1, stream); - cudaCheck(cudaMemcpyAsync( - ret.get(), m_hitsModuleStart, sizeof(uint32_t) * (gpuClustering::maxNumModules + 1), cudaMemcpyDefault, stream)); - return ret; -} - -template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::globalCoordToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); - cudaCheck(cudaMemcpyAsync( - ret.get(), m_store32.get() + 4 * nHits(), 4 * nHits() * sizeof(float), cudaMemcpyDefault, stream)); - return ret; -} - -template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::chargeToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(nHits(), stream); - cudaCheck( - cudaMemcpyAsync(ret.get(), m_store32.get() + 8 * nHits(), nHits() * sizeof(int32_t), cudaMemcpyDefault, stream)); - return ret; -} - -template <> -cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::sizeToHostAsync(cudaStream_t stream) const { - auto ret = cms::cuda::make_host_unique(2 * nHits(), stream); - cudaCheck(cudaMemcpyAsync( - ret.get(), m_store16.get() + 2 * nHits(), 2 * nHits() * sizeof(int16_t), cudaMemcpyDefault, stream)); - return ret; -} +} \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h index 7a19299a9..e7315cc9d 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h @@ -3,6 +3,7 @@ #include "CUDADataFormats/TrackingRecHit2DSOAView.h" #include "CUDADataFormats/HeterogeneousSoA.h" +#include "CUDADataFormats/TrackingRecHit2DHostSOAView.h" template class TrackingRecHit2DHeterogeneous { @@ -10,7 +11,7 @@ class TrackingRecHit2DHeterogeneous { template using unique_ptr = typename Traits::template unique_ptr; - using PhiBinner = TrackingRecHit2DSOAView::PhiBinner; + using PhiBinner = TrackingRecHit2DSOAStore::PhiBinner; TrackingRecHit2DHeterogeneous() = default; @@ -26,8 +27,8 @@ class TrackingRecHit2DHeterogeneous { TrackingRecHit2DHeterogeneous(TrackingRecHit2DHeterogeneous&&) = default; TrackingRecHit2DHeterogeneous& operator=(TrackingRecHit2DHeterogeneous&&) = default; - TrackingRecHit2DSOAView* view() { return m_view.get(); } - TrackingRecHit2DSOAView const* view() const { return m_view.get(); } + TrackingRecHit2DSOAStore* store() { return m_store.get(); } + TrackingRecHit2DSOAStore const* store() const { return m_store.get(); } auto nHits() const { return m_nHits; } @@ -37,29 +38,23 @@ class TrackingRecHit2DHeterogeneous { auto phiBinnerStorage() { return m_phiBinnerStorage; } auto iphi() { return m_iphi; } - // only the local coord and detector index - cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; - - // for validation - cms::cuda::host::unique_ptr globalCoordToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr chargeToHostAsync(cudaStream_t stream) const; - cms::cuda::host::unique_ptr sizeToHostAsync(cudaStream_t stream) const; + // Transfer the local and global coordinates, charge and size + TrackingRecHit2DHostSOAView hitsToHostAsync(cudaStream_t stream) const; + + // apparently unused + //cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: - static constexpr uint32_t n16 = 4; // number of elements in m_store16 - static constexpr uint32_t n32 = 10; // number of elements in m_store32 static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious + + unique_ptr m_PhiBinnerStore; //! + unique_ptr m_AverageGeometryStore; //! - unique_ptr m_store16; //! - unique_ptr m_store32; //! - - unique_ptr m_PhiBinnerStore; //! - unique_ptr m_AverageGeometryStore; //! - - unique_ptr m_view; //! + unique_ptr m_store; //! uint32_t m_nHits; + + unique_ptr m_hitsSupportLayerStartStore; //! uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU! @@ -79,21 +74,21 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH uint32_t const* hitsModuleStart, cudaStream_t stream) : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { - auto view = Traits::template make_host_unique(stream); + auto store = Traits::template make_host_unique(stream); - view->m_nHits = nHits; - m_view = Traits::template make_device_unique(stream); - m_AverageGeometryStore = Traits::template make_device_unique(stream); - view->m_averageGeometry = m_AverageGeometryStore.get(); - view->m_cpeParams = cpeParams; - view->m_hitsModuleStart = hitsModuleStart; + store->m_nHits = nHits; + m_store = Traits::template make_device_unique(stream); + m_AverageGeometryStore = Traits::template make_device_unique(stream); + store->m_averageGeometry = m_AverageGeometryStore.get(); + store->m_cpeParams = cpeParams; + store->m_hitsModuleStart = hitsModuleStart; - // if empy do not bother + // if empty do not bother if (0 == nHits) { if constexpr (std::is_same::value) { - cms::cuda::copyAsync(m_view, view, stream); + cms::cuda::copyAsync(m_store, store, stream); } else { - m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version + m_store.reset(store.release()); // NOLINT: std::move() breaks CUDA version } return; } @@ -103,46 +98,53 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // if ordering is relevant they may have to be stored phi-ordered by layer or so // this will break 1to1 correspondence with cluster and module locality // so unless proven VERY inefficient we keep it ordered as generated - m_store16 = Traits::template make_device_unique(nHits * n16, stream); - m_store32 = - Traits::template make_device_unique(nHits * n32 + phase1PixelTopology::numberOfLayers + 1, stream); - m_PhiBinnerStore = Traits::template make_device_unique(stream); - - static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(float)); - static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(TrackingRecHit2DSOAView::PhiBinner::index_type)); + //m_store16 = Traits::template make_device_unique(nHits * n16, stream); + //m_store32 = + // Traits::template make_device_unique(nHits * n32 + phase1PixelTopology::numberOfLayers + 1, stream); + // We need to store all SoA rows for TrackingRecHit2DSOAView::HitsView(nHits) + + // (phase1PixelTopology::numberOfLayers + 1) TrackingRecHit2DSOAView::PhiBinner::index_type. + // As mentioned above, alignment is not important, yet we want to have 32 bits + // (TrackingRecHit2DSOAView::PhiBinner::index_type exactly) alignement for the second part. + // In order to simplify code, we align all to the minimum necessary size (sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type)). + { + // Simplify a bit following computations + const size_t align = sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type); + const size_t phiBinnerByteSize = + (phase1PixelTopology::numberOfLayers + 1) * sizeof (TrackingRecHit2DSOAStore::PhiBinner::index_type); + // Allocate the buffer + m_hitsSupportLayerStartStore = Traits::template make_device_unique ( + TrackingRecHit2DSOAStore::HitsStore::computeDataSize(m_nHits, align) + + TrackingRecHit2DSOAStore::SupportObjectsStore::computeDataSize(m_nHits, align) + + phiBinnerByteSize, + stream); + // Split the buffer in stores and array + store->m_hitsStore.~HitsStore(); + new (&store->m_hitsStore) TrackingRecHit2DSOAStore::HitsStore(m_hitsSupportLayerStartStore.get(), nHits, align); + store->m_supportObjectsStore.~SupportObjectsStore(); + new (&store->m_supportObjectsStore) TrackingRecHit2DSOAStore::SupportObjectsStore(store->m_hitsStore.soaMetadata().nextByte(), nHits, 1); + m_hitsLayerStart = store->m_hitsLayerStart = reinterpret_cast (store->m_supportObjectsStore.soaMetadata().nextByte()); + // Record additional references + store->m_hitsAndSupportView.~HitsAndSupportView(); + new (&store->m_hitsAndSupportView) TrackingRecHit2DSOAStore::HitsAndSupportView( + store->m_hitsStore, + store->m_supportObjectsStore + ); + m_phiBinnerStorage = store->m_phiBinnerStorage = store->m_supportObjectsStore.phiBinnerStorage(); + m_iphi = store->m_supportObjectsStore.iphi(); + } + m_PhiBinnerStore = Traits::template make_device_unique(stream); - auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; - auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; + static_assert(sizeof(TrackingRecHit2DSOAStore::hindex_type) == sizeof(float)); + static_assert(sizeof(TrackingRecHit2DSOAStore::hindex_type) == sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type)); // copy all the pointers - m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get(); - m_phiBinnerStorage = view->m_phiBinnerStorage = - reinterpret_cast(get32(9)); - - view->m_xl = get32(0); - view->m_yl = get32(1); - view->m_xerr = get32(2); - view->m_yerr = get32(3); - - view->m_xg = get32(4); - view->m_yg = get32(5); - view->m_zg = get32(6); - view->m_rg = get32(7); - - m_iphi = view->m_iphi = reinterpret_cast(get16(0)); - - view->m_charge = reinterpret_cast(get32(8)); - view->m_xsize = reinterpret_cast(get16(2)); - view->m_ysize = reinterpret_cast(get16(3)); - view->m_detInd = get16(1); - - m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); - + m_phiBinner = store->m_phiBinner = m_PhiBinnerStore.get(); + // transfer view if constexpr (std::is_same::value) { - cms::cuda::copyAsync(m_view, view, stream); + cms::cuda::copyAsync(m_store, store, stream); } else { - m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version + m_store.reset(store.release()); // NOLINT: std::move() breaks CUDA version } } @@ -151,4 +153,4 @@ using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous; using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous; -#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc new file mode 100644 index 000000000..7eb795951 --- /dev/null +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc @@ -0,0 +1,15 @@ +#include "CUDADataFormats/TrackingRecHit2DHostSOAView.h" + +TrackingRecHit2DHostSOAView::TrackingRecHit2DHostSOAView(): + hitsStore_(hits_h.get(), 0 /* size */, 1 /* byte alignement */) +{} + +void TrackingRecHit2DHostSOAView::reset() { + hits_h.reset(); + hitsStore_.~HitsStore(); +} + +TrackingRecHit2DHostSOAView::TrackingRecHit2DHostSOAView(size_t size, cudaStream_t stream): + hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsStore::computeDataSize(size), stream)), + hitsStore_(hits_h.get(), size, 1 /* byte alignement */) +{} diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h new file mode 100644 index 000000000..f86f6f104 --- /dev/null +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h @@ -0,0 +1,26 @@ + +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostView_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostView_h + +#include "CUDADataFormats/TrackingRecHit2DSOAView.h" +#include "CUDACore/host_unique_ptr.h" + +template +class TrackingRecHit2DHeterogeneous; + +struct TrackingRecHit2DHostSOAView { + template + friend class TrackingRecHit2DHeterogeneous; +public: + TrackingRecHit2DHostSOAView(); + void reset(); + __device__ __forceinline__ const auto operator[](size_t i) const { return hitsStore_[i]; } + __device__ __forceinline__ size_t size() { return hitsStore_.soaMetadata().size(); } +private: + TrackingRecHit2DHostSOAView(size_t size, cudaStream_t stream); + cms::cuda::host::unique_ptr hits_h; + TrackingRecHit2DSOAStore::HitsStore hitsStore_; +}; + + +#endif // ndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostView_h \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index 5638fcd59..412e24dad 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -7,12 +7,14 @@ #include "CUDACore/HistoContainer.h" #include "CUDACore/cudaCompat.h" #include "Geometry/phase1PixelTopology.h" +#include "DataFormats/SoAStore.h" +#include "DataFormats/SoAView.h" namespace pixelCPEforGPU { struct ParamsOnGPU; } -class TrackingRecHit2DSOAView { +class TrackingRecHit2DSOAStore { public: using hindex_type = uint32_t; // if above is <=2^32 @@ -22,40 +24,84 @@ class TrackingRecHit2DSOAView { template friend class TrackingRecHit2DHeterogeneous; - + __device__ __forceinline__ uint32_t nHits() const { return m_nHits; } - __device__ __forceinline__ float& xLocal(int i) { return m_xl[i]; } - __device__ __forceinline__ float xLocal(int i) const { return __ldg(m_xl + i); } - __device__ __forceinline__ float& yLocal(int i) { return m_yl[i]; } - __device__ __forceinline__ float yLocal(int i) const { return __ldg(m_yl + i); } - - __device__ __forceinline__ float& xerrLocal(int i) { return m_xerr[i]; } - __device__ __forceinline__ float xerrLocal(int i) const { return __ldg(m_xerr + i); } - __device__ __forceinline__ float& yerrLocal(int i) { return m_yerr[i]; } - __device__ __forceinline__ float yerrLocal(int i) const { return __ldg(m_yerr + i); } - - __device__ __forceinline__ float& xGlobal(int i) { return m_xg[i]; } - __device__ __forceinline__ float xGlobal(int i) const { return __ldg(m_xg + i); } - __device__ __forceinline__ float& yGlobal(int i) { return m_yg[i]; } - __device__ __forceinline__ float yGlobal(int i) const { return __ldg(m_yg + i); } - __device__ __forceinline__ float& zGlobal(int i) { return m_zg[i]; } - __device__ __forceinline__ float zGlobal(int i) const { return __ldg(m_zg + i); } - __device__ __forceinline__ float& rGlobal(int i) { return m_rg[i]; } - __device__ __forceinline__ float rGlobal(int i) const { return __ldg(m_rg + i); } - - __device__ __forceinline__ int16_t& iphi(int i) { return m_iphi[i]; } - __device__ __forceinline__ int16_t iphi(int i) const { return __ldg(m_iphi + i); } - - __device__ __forceinline__ int32_t& charge(int i) { return m_charge[i]; } - __device__ __forceinline__ int32_t charge(int i) const { return __ldg(m_charge + i); } - __device__ __forceinline__ int16_t& clusterSizeX(int i) { return m_xsize[i]; } - __device__ __forceinline__ int16_t clusterSizeX(int i) const { return __ldg(m_xsize + i); } - __device__ __forceinline__ int16_t& clusterSizeY(int i) { return m_ysize[i]; } - __device__ __forceinline__ int16_t clusterSizeY(int i) const { return __ldg(m_ysize + i); } - __device__ __forceinline__ uint16_t& detectorIndex(int i) { return m_detInd[i]; } - __device__ __forceinline__ uint16_t detectorIndex(int i) const { return __ldg(m_detInd + i); } - + // Our arrays do not require specific alignment as access will not be coalesced in the current implementation + // Sill, we need the 32 bits integers to be aligned, so we simply declare the SoA with the 32 bits fields first + // and the 16 bits behind (as they have a looser alignment requirement. Then the SoA can be create with a byte + // alignment of 1) + generate_SoA_store(HitsStore, + // 32 bits section + // local coord + SoA_column(float, xLocal), + SoA_column(float, yLocal), + SoA_column(float, xerrLocal), + SoA_column(float, yerrLocal), + + // global coord + SoA_column(float, xGlobal), + SoA_column(float, yGlobal), + SoA_column(float, zGlobal), + SoA_column(float, rGlobal), + // global coordinates continue in the 16 bits section + + // cluster properties + SoA_column(int32_t, charge), + + // 16 bits section (and cluster properties immediately continued) + SoA_column(int16_t, clusterSizeX), + SoA_column(int16_t, clusterSizeY) + ); + + generate_SoA_store(SupportObjectsStore, + // This is the end of the data which is transferred to host. The following columns are supporting + // objects, not transmitted + + // Supporting data (32 bits aligned) + SoA_column(TrackingRecHit2DSOAStore::PhiBinner::index_type, phiBinnerStorage), + + // global coordinates (not transmitted) + SoA_column(int16_t, iphi), + + // cluster properties (not transmitted) + SoA_column(uint16_t, detectorIndex) + ); + + generate_SoA_view(HitsAndSupportView, + SoA_view_store_list( + SoA_view_store(HitsStore, hitsStore), + SoA_view_store(SupportObjectsStore, supportObjectsStore) + ), + SoA_view_value_list( + SoA_view_value(hitsStore, xLocal, xLocal), + SoA_view_value(hitsStore, yLocal, yLocal), + SoA_view_value(hitsStore, xerrLocal, xerrLocal), + SoA_view_value(hitsStore, yerrLocal, yerrLocal), + + SoA_view_value(hitsStore, xGlobal, xGlobal), + SoA_view_value(hitsStore, yGlobal, yGlobal), + SoA_view_value(hitsStore, zGlobal, zGlobal), + SoA_view_value(hitsStore, rGlobal, rGlobal), + + SoA_view_value(hitsStore, charge, charge), + SoA_view_value(hitsStore, clusterSizeX, clusterSizeX), + SoA_view_value(hitsStore, clusterSizeY, clusterSizeY), + + SoA_view_value(supportObjectsStore, phiBinnerStorage, phiBinnerStorage), + SoA_view_value(supportObjectsStore, iphi, iphi), + SoA_view_value(supportObjectsStore, detectorIndex, detectorIndex) + ) + ); + + // Shortcut operator saving the explicit calls to view in usage. + __device__ __forceinline__ HitsAndSupportView::element operator[] (size_t index) { + return m_hitsAndSupportView[index]; + } + __device__ __forceinline__ const HitsAndSupportView::const_element operator[] (size_t index) const { + return m_hitsAndSupportView[index]; + } + __device__ __forceinline__ pixelCPEforGPU::ParamsOnGPU const& cpeParams() const { return *m_cpeParams; } __device__ __forceinline__ uint32_t hitsModuleStart(int i) const { return __ldg(m_hitsModuleStart + i); } @@ -70,21 +116,14 @@ class TrackingRecHit2DSOAView { __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; } private: - // local coord - float *m_xl, *m_yl; - float *m_xerr, *m_yerr; - - // global coord - float *m_xg, *m_yg, *m_zg, *m_rg; - int16_t* m_iphi; - - // cluster properties - int32_t* m_charge; - int16_t* m_xsize; - int16_t* m_ysize; - uint16_t* m_detInd; - - // supporting objects + // hits store + HitsStore m_hitsStore; + // supporting objects store + SupportObjectsStore m_supportObjectsStore; + // Global view simplifying usage + HitsAndSupportView m_hitsAndSupportView; + + // individually defined supporting objects // m_averageGeometry is corrected for beam spot, not sure where to host it otherwise AverageGeometry* m_averageGeometry; // owned by TrackingRecHit2DHeterogeneous pixelCPEforGPU::ParamsOnGPU const* m_cpeParams; // forwarded from setup, NOT owned @@ -98,4 +137,4 @@ class TrackingRecHit2DSOAView { uint32_t m_nHits; }; -#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h +#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/BrokenLineFitOnGPU.h b/src/cudadev/plugin-PixelTriplets/BrokenLineFitOnGPU.h index 67831af89..b7b5bfcc2 100644 --- a/src/cudadev/plugin-PixelTriplets/BrokenLineFitOnGPU.h +++ b/src/cudadev/plugin-PixelTriplets/BrokenLineFitOnGPU.h @@ -16,7 +16,7 @@ #include "BrokenLine.h" #include "HelixFitOnGPU.h" -using HitsOnGPU = TrackingRecHit2DSOAView; +using HitsOnGPU = TrackingRecHit2DSOAStore; using Tuples = pixelTrack::HitContainer; using OutputSoA = pixelTrack::TrackSoA; @@ -79,8 +79,8 @@ __global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets, auto hit = hitId[i]; float ge[6]; hhp->cpeParams() - .detParams(hhp->detectorIndex(hit)) - .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge); + .detParams((*hhp)[hit].detectorIndex()) + .frame.toGlobal((*hhp)[hit].xerrLocal(), 0, (*hhp)[hit].yerrLocal(), ge); #ifdef BL_DUMP_HITS if (dump) { printf("Hit global: %d: %d hits.col(%d) << %f,%f,%f\n", @@ -102,7 +102,7 @@ __global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets, ge[5]); } #endif - hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit); + hits.col(i) << (*hhp)[hit].xGlobal(), (*hhp)[hit].yGlobal(), (*hhp)[hit].zGlobal(); hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; } brokenline::fastFit(hits, fast_fit); @@ -181,4 +181,4 @@ __global__ void kernel_BLFit(caConstants::TupleMultiplicity const *__restrict__ line.cov(1, 1)); #endif } -} +} \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cc b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cc index f2805d018..ed7a4776c 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cc +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cc @@ -63,7 +63,7 @@ void CAHitNtupletGeneratorKernelsCPU::buildDoublets(HitsOnCPU const &hh, cudaStr device_nCells_, device_theCellNeighbors_.get(), device_theCellTracks_.get(), - hh.view(), + hh.store(), device_isOuterHitOfCell_.get(), nActualPairs, params_.idealConditions_, @@ -94,7 +94,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * kernel_connect(device_hitTuple_apc_, device_hitToTuple_apc_, // needed only to be reset, ready for next kernel - hh.view(), + hh.store(), device_theCells_.get(), device_nCells_, device_theCellNeighbors_.get(), @@ -108,10 +108,10 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * if (nhits > 1 && params_.earlyFishbone_) { gpuPixelDoublets::fishbone( - hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false); + hh.store(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false); } - kernel_find_ntuplets(hh.view(), + kernel_find_ntuplets(hh.store(), device_theCells_.get(), device_nCells_, device_theCellTracks_.get(), @@ -120,7 +120,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * quality_d, params_.minHitsPerNtuplet_); if (params_.doStats_) - kernel_mark_used(hh.view(), device_theCells_.get(), device_nCells_); + kernel_mark_used(hh.store(), device_theCells_.get(), device_nCells_); cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d); @@ -133,7 +133,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * if (nhits > 1 && params_.lateFishbone_) { gpuPixelDoublets::fishbone( - hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true); + hh.store(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true); } if (params_.doStats_) { @@ -178,7 +178,7 @@ void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA // remove duplicates (tracks that share a hit) if (params_.doSharedHitCut_) { kernel_sharedHitCleaner( - hh.view(), tuples_d, tracks_d, quality_d, params_.minHitsForSharingCut_, device_hitToTuple_.get()); + hh.store(), tuples_d, tracks_d, quality_d, params_.minHitsForSharingCut_, device_hitToTuple_.get()); } if (params_.doStats_) { @@ -190,6 +190,6 @@ void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA #ifdef DUMP_GPU_TK_TUPLES static std::atomic iev(0); ++iev; - kernel_print_found_ntuplets(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev); + kernel_print_found_ntuplets(hh.store(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev); #endif -} +} \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cu b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cu index edc1eb49b..423caf558 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cu +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.cu @@ -50,7 +50,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * kernel_connect<<>>( device_hitTuple_apc_, device_hitToTuple_apc_, // needed only to be reset, ready for next kernel - hh.view(), + hh.store(), device_theCells_.get(), device_nCells_, device_theCellNeighbors_.get(), @@ -71,13 +71,13 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * dim3 blks(1, numberOfBlocks, 1); dim3 thrs(stride, blockSize, 1); gpuPixelDoublets::fishbone<<>>( - hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false); + hh.store(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false); cudaCheck(cudaGetLastError()); } blockSize = 64; numberOfBlocks = (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize; - kernel_find_ntuplets<<>>(hh.view(), + kernel_find_ntuplets<<>>(hh.store(), device_theCells_.get(), device_nCells_, device_theCellTracks_.get(), @@ -88,7 +88,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * cudaCheck(cudaGetLastError()); if (params_.doStats_) - kernel_mark_used<<>>(hh.view(), device_theCells_.get(), device_nCells_); + kernel_mark_used<<>>(hh.store(), device_theCells_.get(), device_nCells_); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG @@ -123,7 +123,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * dim3 blks(1, numberOfBlocks, 1); dim3 thrs(stride, blockSize, 1); gpuPixelDoublets::fishbone<<>>( - hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true); + hh.store(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true); cudaCheck(cudaGetLastError()); } @@ -205,7 +205,7 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr device_nCells_, device_theCellNeighbors_.get(), device_theCellTracks_.get(), - hh.view(), + hh.store(), device_isOuterHitOfCell_.get(), nActualPairs, params_.idealConditions_, @@ -275,7 +275,7 @@ void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA // remove duplicates (tracks that share a hit) numberOfBlocks = (hitToTupleView_.offSize + blockSize - 1) / blockSize; kernel_sharedHitCleaner<<>>( - hh.view(), tuples_d, tracks_d, quality_d, params_.minHitsForSharingCut_, device_hitToTuple_.get()); + hh.store(), tuples_d, tracks_d, quality_d, params_.minHitsForSharingCut_, device_hitToTuple_.get()); cudaCheck(cudaGetLastError()); } @@ -314,11 +314,11 @@ void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA static std::atomic iev(0); ++iev; kernel_print_found_ntuplets<<<1, 32, 0, cudaStream>>>( - hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev); + hh.store(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev); #endif } template <> void CAHitNtupletGeneratorKernelsGPU::printCounters(Counters const *counters) { kernel_printCounters<<<1, 1>>>(counters); -} +} \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.h b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.h index dd87597a4..6f5cdccb0 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.h +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernels.h @@ -25,8 +25,8 @@ namespace cAHitNtupletGenerator { unsigned long long nZeroTrackCells; }; - using HitsView = TrackingRecHit2DSOAView; - using HitsOnGPU = TrackingRecHit2DSOAView; + using HitsView = TrackingRecHit2DSOAStore; + using HitsOnGPU = TrackingRecHit2DSOAStore; using HitToTuple = caConstants::HitToTuple; using TupleMultiplicity = caConstants::TupleMultiplicity; @@ -157,8 +157,8 @@ class CAHitNtupletGeneratorKernels { template using unique_ptr = typename Traits::template unique_ptr; - using HitsView = TrackingRecHit2DSOAView; - using HitsOnGPU = TrackingRecHit2DSOAView; + using HitsView = TrackingRecHit2DSOAStore; + using HitsOnGPU = TrackingRecHit2DSOAStore; using HitsOnCPU = TrackingRecHit2DHeterogeneous; using HitToTuple = caConstants::HitToTuple; @@ -232,4 +232,4 @@ class CAHitNtupletGeneratorKernels { using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels; using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels; -#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h index f14f5d8ea..032cf3a73 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h @@ -20,7 +20,7 @@ #include "gpuFishbone.h" #include "gpuPixelDoublets.h" -using HitsOnGPU = TrackingRecHit2DSOAView; +using HitsOnGPU = TrackingRecHit2DSOAStore; using HitsOnCPU = TrackingRecHit2DCUDA; using HitToTuple = caConstants::HitToTuple; @@ -449,7 +449,7 @@ __global__ void kernel_fillHitInTracks(HitContainer const *__restrict__ tuples, } __global__ void kernel_fillHitDetIndices(HitContainer const *__restrict__ tuples, - TrackingRecHit2DSOAView const *__restrict__ hhp, + TrackingRecHit2DSOAStore const *__restrict__ hhp, HitContainer *__restrict__ hitDetIndices) { int first = blockDim.x * blockIdx.x + threadIdx.x; // copy offsets @@ -461,7 +461,7 @@ __global__ void kernel_fillHitDetIndices(HitContainer const *__restrict__ tuples auto nhits = hh.nHits(); for (int idx = first, ntot = tuples->size(); idx < ntot; idx += gridDim.x * blockDim.x) { assert(tuples->content[idx] < nhits); - hitDetIndices->content[idx] = hh.detectorIndex(tuples->content[idx]); + hitDetIndices->content[idx] = hh[tuples->content[idx]].detectorIndex(); } } @@ -478,7 +478,7 @@ __global__ void kernel_doStatsForHitInTracks(CAHitNtupletGeneratorKernelsGPU::Hi } } -__global__ void kernel_sharedHitCleaner(TrackingRecHit2DSOAView const *__restrict__ hhp, +__global__ void kernel_sharedHitCleaner(TrackingRecHit2DSOAStore const *__restrict__ hhp, HitContainer const *__restrict__ ptuples, TkSoA const *__restrict__ ptracks, Quality *__restrict__ quality, @@ -540,7 +540,7 @@ __global__ void kernel_sharedHitCleaner(TrackingRecHit2DSOAView const *__restric } // loop over hits } -__global__ void kernel_print_found_ntuplets(TrackingRecHit2DSOAView const *__restrict__ hhp, +__global__ void kernel_print_found_ntuplets(TrackingRecHit2DSOAStore const *__restrict__ hhp, HitContainer const *__restrict__ ptuples, TkSoA const *__restrict__ ptracks, Quality const *__restrict__ quality, @@ -604,4 +604,4 @@ __global__ void kernel_printCounters(cAHitNtupletGenerator::Counters const *coun c.nKilledCells / double(c.nEvents), c.nEmptyCells / double(c.nCells), c.nZeroTrackCells / double(c.nCells)); -} +} \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.cc b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.cc index 714748cc1..490184d2b 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.cc +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.cc @@ -124,14 +124,14 @@ PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecH kernels.buildDoublets(hits_d, stream); kernels.launchKernels(hits_d, soa, stream); - kernels.fillHitDetIndices(hits_d.view(), soa, stream); // in principle needed only if Hits not "available" + kernels.fillHitDetIndices(hits_d.store(), soa, stream); // in principle needed only if Hits not "available" HelixFitOnGPU fitter(bfield, m_params.fit5as4_); fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa); if (m_params.useRiemannFit_) { - fitter.launchRiemannKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream); + fitter.launchRiemannKernels(hits_d.store(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream); } else { - fitter.launchBrokenLineKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream); + fitter.launchBrokenLineKernels(hits_d.store(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream); } kernels.classifyTuples(hits_d, soa, stream); @@ -156,7 +156,7 @@ PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuples(TrackingRecHit2DC kernels.buildDoublets(hits_d, nullptr); kernels.launchKernels(hits_d, soa, nullptr); - kernels.fillHitDetIndices(hits_d.view(), soa, nullptr); // in principle needed only if Hits not "available" + kernels.fillHitDetIndices(hits_d.store(), soa, nullptr); // in principle needed only if Hits not "available" if (0 == hits_d.nHits()) return tracks; @@ -166,9 +166,9 @@ PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuples(TrackingRecHit2DC fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa); if (m_params.useRiemannFit_) { - fitter.launchRiemannKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets); + fitter.launchRiemannKernelsOnCPU(hits_d.store(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets); } else { - fitter.launchBrokenLineKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets); + fitter.launchBrokenLineKernelsOnCPU(hits_d.store(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets); } kernels.classifyTuples(hits_d, soa, nullptr); @@ -178,4 +178,4 @@ PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuples(TrackingRecHit2DC #endif return tracks; -} +} \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.h b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.h index f42bb301b..978b828a5 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.h +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorOnGPU.h @@ -19,9 +19,9 @@ namespace edm { class CAHitNtupletGeneratorOnGPU { public: - using HitsOnGPU = TrackingRecHit2DSOAView; + using HitsOnGPU = TrackingRecHit2DSOAStore; using HitsOnCPU = TrackingRecHit2DCUDA; - using hindex_type = TrackingRecHit2DSOAView::hindex_type; + using hindex_type = TrackingRecHit2DSOAStore::hindex_type; using Quality = pixelTrack::Quality; using OutputSoA = pixelTrack::TrackSoA; @@ -53,4 +53,4 @@ class CAHitNtupletGeneratorOnGPU { Counters* m_counters = nullptr; }; -#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorOnGPU_h +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorOnGPU_h \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/GPUCACell.h b/src/cudadev/plugin-PixelTriplets/GPUCACell.h index 58ef54ae8..84eab621e 100644 --- a/src/cudadev/plugin-PixelTriplets/GPUCACell.h +++ b/src/cudadev/plugin-PixelTriplets/GPUCACell.h @@ -29,7 +29,7 @@ class GPUCACell { using CellNeighborsVector = caConstants::CellNeighborsVector; using CellTracksVector = caConstants::CellTracksVector; - using Hits = TrackingRecHit2DSOAView; + using Hits = TrackingRecHit2DSOAStore; using hindex_type = Hits::hindex_type; using TmpTuple = cms::cuda::VecArray; @@ -54,8 +54,8 @@ class GPUCACell { theUsed_ = 0; // optimization that depends on access pattern - theInnerZ = hh.zGlobal(innerHitId); - theInnerR = hh.rGlobal(innerHitId); + theInnerZ = hh[innerHitId].zGlobal(); + theInnerR = hh[innerHitId].rGlobal(); // link to default empty theOuterNeighbors = &cellNeighbors[0]; @@ -109,22 +109,22 @@ class GPUCACell { __device__ __forceinline__ CellTracks const& tracks() const { return *theTracks; } __device__ __forceinline__ CellNeighbors& outerNeighbors() { return *theOuterNeighbors; } __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; } - __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); } - __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); } - __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); } - __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); } + __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh[theInnerHitId].xGlobal(); } + __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh[theOuterHitId].xGlobal(); } + __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh[theInnerHitId].yGlobal(); } + __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh[theOuterHitId].yGlobal(); } __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; } // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; } - __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); } + __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh[theOuterHitId].zGlobal(); } __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; } // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; } - __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); } + __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh[theOuterHitId].rGlobal(); } - __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); } - __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); } + __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh[theInnerHitId].iphi(); } + __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh[theOuterHitId].iphi(); } - __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); } - __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); } + __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh[theInnerHitId].detectorIndex(); } + __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh[theOuterHitId].detectorIndex(); } constexpr unsigned int inner_hit_id() const { return theInnerHitId; } constexpr unsigned int outer_hit_id() const { return theOuterHitId; } diff --git a/src/cudadev/plugin-PixelTriplets/HelixFitOnGPU.h b/src/cudadev/plugin-PixelTriplets/HelixFitOnGPU.h index fee0f8dae..92f881f24 100644 --- a/src/cudadev/plugin-PixelTriplets/HelixFitOnGPU.h +++ b/src/cudadev/plugin-PixelTriplets/HelixFitOnGPU.h @@ -33,7 +33,7 @@ namespace riemannFit { class HelixFitOnGPU { public: - using HitsView = TrackingRecHit2DSOAView; + using HitsView = TrackingRecHit2DSOAStore; using Tuples = pixelTrack::HitContainer; using OutputSoA = pixelTrack::TrackSoA; @@ -65,4 +65,4 @@ class HelixFitOnGPU { const bool fit5as4_; }; -#endif // RecoPixelVertexing_PixelTriplets_plugins_HelixFitOnGPU_h +#endif // RecoPixelVertexing_PixelTriplets_plugins_HelixFitOnGPU_h \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/RiemannFitOnGPU.h b/src/cudadev/plugin-PixelTriplets/RiemannFitOnGPU.h index 12c9856fa..b82cf795f 100644 --- a/src/cudadev/plugin-PixelTriplets/RiemannFitOnGPU.h +++ b/src/cudadev/plugin-PixelTriplets/RiemannFitOnGPU.h @@ -14,7 +14,7 @@ #include "RiemannFit.h" #include "HelixFitOnGPU.h" -using HitsOnGPU = TrackingRecHit2DSOAView; +using HitsOnGPU = TrackingRecHit2DSOAStore; using Tuples = pixelTrack::HitContainer; using OutputSoA = pixelTrack::TrackSoA; @@ -66,11 +66,11 @@ __global__ void kernel_FastFit(Tuples const *__restrict__ foundNtuplets, // printf("Hit global: %f,%f,%f\n", hhp->xg_d[hit],hhp->yg_d[hit],hhp->zg_d[hit]); float ge[6]; hhp->cpeParams() - .detParams(hhp->detectorIndex(hit)) - .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge); + .detParams((*hhp)[hit].detectorIndex()) + .frame.toGlobal((*hhp)[hit].xerrLocal(), 0, (*hhp)[hit].yerrLocal(), ge); // printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]); - hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit); + hits.col(i) << (*hhp)[hit].xGlobal(), (*hhp)[hit].yGlobal(), (*hhp)[hit].zGlobal(); hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; } riemannFit::fastFit(hits, fast_fit); @@ -184,4 +184,4 @@ __global__ void kernel_LineFit(caConstants::TupleMultiplicity const *__restrict_ line_fit.cov(1, 1)); #endif } -} +} \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/gpuPixelDoublets.h b/src/cudadev/plugin-PixelTriplets/gpuPixelDoublets.h index cbb374698..4cbbf51c2 100644 --- a/src/cudadev/plugin-PixelTriplets/gpuPixelDoublets.h +++ b/src/cudadev/plugin-PixelTriplets/gpuPixelDoublets.h @@ -99,7 +99,7 @@ namespace gpuPixelDoublets { uint32_t* nCells, CellNeighborsVector* cellNeighbors, CellTracksVector* cellTracks, - TrackingRecHit2DSOAView const* __restrict__ hhp, + TrackingRecHit2DSOAStore const* __restrict__ hhp, GPUCACell::OuterHitOfCell* isOuterHitOfCell, int nActualPairs, bool ideal_cond, @@ -129,4 +129,4 @@ namespace gpuPixelDoublets { } // namespace gpuPixelDoublets -#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoublets_h +#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoublets_h \ No newline at end of file diff --git a/src/cudadev/plugin-PixelTriplets/gpuPixelDoubletsAlgos.h b/src/cudadev/plugin-PixelTriplets/gpuPixelDoubletsAlgos.h index 0dcc65203..308ce1f98 100644 --- a/src/cudadev/plugin-PixelTriplets/gpuPixelDoubletsAlgos.h +++ b/src/cudadev/plugin-PixelTriplets/gpuPixelDoubletsAlgos.h @@ -28,7 +28,7 @@ namespace gpuPixelDoublets { uint32_t* nCells, CellNeighborsVector* cellNeighbors, CellTracksVector* cellTracks, - TrackingRecHit2DSOAView const& __restrict__ hh, + TrackingRecHit2DSOAStore const& __restrict__ hh, GPUCACell::OuterHitOfCell* isOuterHitOfCell, int16_t const* __restrict__ phicuts, float const* __restrict__ minz, @@ -50,7 +50,7 @@ namespace gpuPixelDoublets { bool isOuterLadder = ideal_cond; - using PhiBinner = TrackingRecHit2DSOAView::PhiBinner; + using PhiBinner = TrackingRecHit2DSOAStore::PhiBinner; auto const& __restrict__ phiBinner = hh.phiBinner(); uint32_t const* __restrict__ offsets = hh.hitsLayerStart(); @@ -104,7 +104,7 @@ namespace gpuPixelDoublets { assert(i < offsets[inner + 1]); // found hit corresponding to our cuda thread, now do the job - auto mi = hh.detectorIndex(i); + auto mi = hh[i].detectorIndex(); if (mi > gpuClustering::maxNumModules) continue; // invalid @@ -114,7 +114,7 @@ namespace gpuPixelDoublets { if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue; */ - auto mez = hh.zGlobal(i); + auto mez = hh[i].zGlobal(); if (mez < minz[pairLayerId] || mez > maxz[pairLayerId]) continue; @@ -127,7 +127,7 @@ namespace gpuPixelDoublets { isOuterLadder = ideal_cond ? true : 0 == (mi / 8) % 2; // only for B1/B2/B3 B4 is opposite, FPIX:noclue... // in any case we always test mes>0 ... - mes = inner > 0 || isOuterLadder ? hh.clusterSizeY(i) : -1; + mes = inner > 0 || isOuterLadder ? hh[i].clusterSizeY() : -1; if (inner == 0 && outer > 3) // B1 and F1 if (mes > 0 && mes < minYsizeB1) @@ -136,8 +136,8 @@ namespace gpuPixelDoublets { if (mes > 0 && mes < minYsizeB2) continue; } - auto mep = hh.iphi(i); - auto mer = hh.rGlobal(i); + auto mep = hh[i].iphi(); + auto mer = hh[i].rGlobal(); // all cuts: true if fails constexpr float z0cut = 12.f; // cm @@ -148,26 +148,26 @@ namespace gpuPixelDoublets { auto ptcut = [&](int j, int16_t idphi) { auto r2t4 = minRadius2T4; auto ri = mer; - auto ro = hh.rGlobal(j); + auto ro = hh[j].rGlobal(); auto dphi = short2phi(idphi); return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri); }; auto z0cutoff = [&](int j) { - auto zo = hh.zGlobal(j); - auto ro = hh.rGlobal(j); + auto zo = hh[j].zGlobal(); + auto ro = hh[j].rGlobal(); auto dr = ro - mer; return dr > maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr; }; auto zsizeCut = [&](int j) { auto onlyBarrel = outer < 4; - auto so = hh.clusterSizeY(j); + auto so = hh[j].clusterSizeY(); auto dy = inner == 0 ? maxDYsize12 : maxDYsize; // in the barrel cut on difference in size // in the endcap on the prediction on the first layer (actually in the barrel only: happen to be safe for endcap as well) // FIXME move pred cut to z0cutoff to optmize loading of and computaiton ... - auto zo = hh.zGlobal(j); - auto ro = hh.rGlobal(j); + auto zo = hh[j].zGlobal(); + auto ro = hh[j].rGlobal(); return onlyBarrel ? mes > 0 && so > 0 && std::abs(so - mes) > dy : (inner < 4) && mes > 0 && std::abs(mes - int(std::abs((mez - zo) / (mer - ro)) * dzdrFact + 0.5f)) > maxDYPred; @@ -199,14 +199,14 @@ namespace gpuPixelDoublets { auto oi = __ldg(p); assert(oi >= offsets[outer]); assert(oi < offsets[outer + 1]); - auto mo = hh.detectorIndex(oi); + auto mo = hh[oi].detectorIndex(); if (mo > gpuClustering::maxNumModules) continue; // invalid if (doZ0Cut && z0cutoff(oi)) continue; - auto mop = hh.iphi(oi); + auto mop = hh[oi].iphi(); uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop))); if (idphi > iphicut) continue; @@ -240,4 +240,4 @@ namespace gpuPixelDoublets { } // namespace gpuPixelDoublets -#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h +#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h \ No newline at end of file diff --git a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu index 997e5b450..fcee79db8 100644 --- a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu @@ -50,7 +50,7 @@ namespace pixelgpudetails { // protect from empty events if (blocks) { gpuPixelRecHits::getHits<<>>( - cpeParams, bs_d.data(), digis_d.pixelView(), digis_d.nDigis(), clusters_d.store(), hits_d.view()); + cpeParams, bs_d.data(), digis_d.pixelView(), digis_d.nDigis(), clusters_d.store(), hits_d.store()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaCheck(cudaDeviceSynchronize()); @@ -74,4 +74,4 @@ namespace pixelgpudetails { return hits_d; } -} // namespace pixelgpudetails +} // namespace pixelgpudetails \ No newline at end of file diff --git a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h index 5c556f92b..2c3d7bff2 100644 --- a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h +++ b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h @@ -19,7 +19,7 @@ namespace gpuPixelRecHits { SiPixelDigisCUDA::DevicePixelView pdigis, int numElements, SiPixelClustersCUDA::DeviceStore const pclusters, - TrackingRecHit2DSOAView* phits) { + TrackingRecHit2DSOAStore* phits) { // FIXME // the compiler seems NOT to optimize loads from views (even in a simple test case) // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature @@ -37,7 +37,7 @@ namespace gpuPixelRecHits { if (0 == blockIdx.x) { auto& agc = hits.averageGeometry(); auto const& ag = cpeParams->averageGeometry(); - for (int il = threadIdx.x, nl = TrackingRecHit2DSOAView::AverageGeometry::numberOfLaddersInBarrel; il < nl; + for (int il = threadIdx.x, nl = TrackingRecHit2DSOAStore::AverageGeometry::numberOfLaddersInBarrel; il < nl; il += blockDim.x) { agc.ladderZ[il] = ag.ladderZ[il] - bs->z; agc.ladderX[il] = ag.ladderX[il] - bs->x; @@ -175,18 +175,18 @@ namespace gpuPixelRecHits { pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); // store it - hits.charge(h) = clusParams.charge[ic]; - hits.detectorIndex(h) = me; + hits[h].charge = clusParams.charge[ic]; + hits[h].detectorIndex = me; float xl, yl; - hits.xLocal(h) = xl = clusParams.xpos[ic]; - hits.yLocal(h) = yl = clusParams.ypos[ic]; + hits[h].xLocal = xl = clusParams.xpos[ic]; + hits[h].yLocal = yl = clusParams.ypos[ic]; - hits.clusterSizeX(h) = clusParams.xsize[ic]; - hits.clusterSizeY(h) = clusParams.ysize[ic]; + hits[h].clusterSizeX = clusParams.xsize[ic]; + hits[h].clusterSizeY = clusParams.ysize[ic]; - hits.xerrLocal(h) = clusParams.xerr[ic] * clusParams.xerr[ic]; - hits.yerrLocal(h) = clusParams.yerr[ic] * clusParams.yerr[ic]; + hits[h].xerrLocal = clusParams.xerr[ic] * clusParams.xerr[ic]; + hits[h].yerrLocal = clusParams.yerr[ic] * clusParams.yerr[ic]; // keep it local for computations float xg, yg, zg; @@ -197,12 +197,12 @@ namespace gpuPixelRecHits { yg -= bs->y; zg -= bs->z; - hits.xGlobal(h) = xg; - hits.yGlobal(h) = yg; - hits.zGlobal(h) = zg; + hits[h].xGlobal = xg; + hits[h].yGlobal = yg; + hits[h].zGlobal = zg; - hits.rGlobal(h) = std::sqrt(xg * xg + yg * yg); - hits.iphi(h) = unsafe_atan2s<7>(yg, xg); + hits[h].rGlobal = std::sqrt(xg * xg + yg * yg); + hits[h].iphi = unsafe_atan2s<7>(yg, xg); } __syncthreads(); } // end loop on batches @@ -210,4 +210,4 @@ namespace gpuPixelRecHits { } // namespace gpuPixelRecHits -#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h +#endif // RecoLocalTracker_SiPixelRecHits_plugins_gpuPixelRecHits_h \ No newline at end of file diff --git a/src/cudadev/plugin-Validation/HistoValidator.cc b/src/cudadev/plugin-Validation/HistoValidator.cc index 8a888666b..9f998de8d 100644 --- a/src/cudadev/plugin-Validation/HistoValidator.cc +++ b/src/cudadev/plugin-Validation/HistoValidator.cc @@ -38,10 +38,7 @@ class HistoValidator : public edm::EDProducerExternalWork { uint32_t nHits; cms::cuda::host::unique_ptr h_adc; cms::cuda::host::unique_ptr h_clusInModule; - cms::cuda::host::unique_ptr h_localCoord; - cms::cuda::host::unique_ptr h_globalCoord; - cms::cuda::host::unique_ptr h_charge; - cms::cuda::host::unique_ptr h_size; + TrackingRecHit2DHostSOAView h_hits; static std::map histos; }; @@ -107,10 +104,7 @@ void HistoValidator::acquire(const edm::Event& iEvent, h_clusInModule.get(), clusters.clusInModule(), sizeof(uint32_t) * nModules, cudaMemcpyDefault, ctx.stream())); nHits = hits.nHits(); - h_localCoord = hits.localCoordToHostAsync(ctx.stream()); - h_globalCoord = hits.globalCoordToHostAsync(ctx.stream()); - h_charge = hits.chargeToHostAsync(ctx.stream()); - h_size = hits.sizeToHostAsync(ctx.stream()); + h_hits = hits.hitsToHostAsync(ctx.stream()); } void HistoValidator::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { @@ -129,23 +123,19 @@ void HistoValidator::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) histos["hit_n"].fill(nHits); for (uint32_t i = 0; i < nHits; ++i) { - histos["hit_lx"].fill(h_localCoord[i]); - histos["hit_ly"].fill(h_localCoord[i + nHits]); - histos["hit_lex"].fill(h_localCoord[i + 2 * nHits]); - histos["hit_ley"].fill(h_localCoord[i + 3 * nHits]); - histos["hit_gx"].fill(h_globalCoord[i]); - histos["hit_gy"].fill(h_globalCoord[i + nHits]); - histos["hit_gz"].fill(h_globalCoord[i + 2 * nHits]); - histos["hit_gr"].fill(h_globalCoord[i + 3 * nHits]); - histos["hit_charge"].fill(h_charge[i]); - histos["hit_sizex"].fill(h_size[i]); - histos["hit_sizey"].fill(h_size[i + nHits]); + histos["hit_lx"].fill(h_hits[i].xLocal()); + histos["hit_ly"].fill(h_hits[i].yLocal()); + histos["hit_lex"].fill(h_hits[i].xerrLocal()); + histos["hit_ley"].fill(h_hits[i].yerrLocal()); + histos["hit_gx"].fill(h_hits[i].xGlobal()); + histos["hit_gy"].fill(h_hits[i].yGlobal()); + histos["hit_gz"].fill(h_hits[i].zGlobal()); + histos["hit_gr"].fill(h_hits[i].rGlobal()); + histos["hit_charge"].fill(h_hits[i].charge()); + histos["hit_sizex"].fill(h_hits[i].clusterSizeX()); + histos["hit_sizey"].fill(h_hits[i].clusterSizeY()); } - h_localCoord.reset(); - h_globalCoord.reset(); - h_charge.reset(); - h_size.reset(); - + h_hits.reset(); { auto const& tracks = iEvent.get(trackToken_); @@ -189,4 +179,4 @@ void HistoValidator::endJob() { } } -DEFINE_FWK_MODULE(HistoValidator); +DEFINE_FWK_MODULE(HistoValidator); \ No newline at end of file diff --git a/src/cudadev/test/TrackingRecHit2DCUDA_t.cu b/src/cudadev/test/TrackingRecHit2DCUDA_t.cu index 4572d8f42..efd76fd70 100644 --- a/src/cudadev/test/TrackingRecHit2DCUDA_t.cu +++ b/src/cudadev/test/TrackingRecHit2DCUDA_t.cu @@ -4,7 +4,7 @@ namespace testTrackingRecHit2D { - __global__ void fill(TrackingRecHit2DSOAView* phits) { + __global__ void fill(TrackingRecHit2DSOAStore* phits) { assert(phits); [[maybe_unused]] auto& hits = *phits; assert(hits.nHits() == 200); @@ -14,7 +14,7 @@ namespace testTrackingRecHit2D { return; } - __global__ void verify(TrackingRecHit2DSOAView const* phits) { + __global__ void verify(TrackingRecHit2DSOAStore const* phits) { assert(phits); [[maybe_unused]] auto const& hits = *phits; assert(hits.nHits() == 200); @@ -24,7 +24,7 @@ namespace testTrackingRecHit2D { return; } - void runKernels(TrackingRecHit2DSOAView* hits) { + void runKernels(TrackingRecHit2DSOAStore* hits) { assert(hits); fill<<<1, 1024>>>(hits); verify<<<1, 1024>>>(hits); @@ -34,7 +34,7 @@ namespace testTrackingRecHit2D { namespace testTrackingRecHit2D { - void runKernels(TrackingRecHit2DSOAView* hits); + void runKernels(TrackingRecHit2DSOAStore* hits); } @@ -47,10 +47,10 @@ int main() { auto nHits = 200; TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); - testTrackingRecHit2D::runKernels(tkhit.view()); + testTrackingRecHit2D::runKernels(tkhit.store()); } cudaCheck(cudaStreamDestroy(stream)); return 0; -} +} \ No newline at end of file From 2918050d25160763dbf8e677dd9ce6952d84eb1a Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 19 Oct 2021 10:25:40 +0200 Subject: [PATCH 07/50] [cudadev] Updated SoA view so they support scalars in addition to columns. The Eigen columns, present in the stores are explicitly not supported (assertion). --- src/cudadev/DataFormats/SoACommon.h | 19 ++--- src/cudadev/DataFormats/SoAStore.h | 107 +++++----------------------- src/cudadev/DataFormats/SoAView.h | 42 ++++++++--- 3 files changed, 60 insertions(+), 108 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 912a92413..63f868fc4 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -43,7 +43,7 @@ class SoAValue { SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ SOA_HOST_DEVICE_INLINE T& operator() () { return col_[idx_]; } - SOA_HOST_DEVICE_INLINE T operator() () const { return LOAD_INCOHERENT(col_ + idx_); } + SOA_HOST_DEVICE_INLINE T operator() () const { return *(col_ + idx_); } SOA_HOST_DEVICE_INLINE T* operator& () { return &col_[idx_]; } SOA_HOST_DEVICE_INLINE const T* operator& () const { return &col_[idx_]; } template @@ -61,7 +61,7 @@ class SoAConstValue { public: SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T * col): idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE T operator() () const { return LOAD_INCOHERENT(col_ + idx_); } + SOA_HOST_DEVICE_INLINE T operator() () const { return *(col_ + idx_); } SOA_HOST_DEVICE_INLINE const T* operator& () const { return &col_[idx_]; } typedef T valueType; static constexpr auto valueSize = sizeof(T); @@ -142,12 +142,16 @@ inline size_t alignSize(size_t size, size_t alignment = 128) { #define _VALUE_TYPE_SCALAR 0 #define _VALUE_TYPE_COLUMN 1 #define _VALUE_TYPE_EIGEN_COLUMN 2 -#define _VALUE_TYPE_FUNDAMENTAL_COLUMN 3 + +enum class SoAColumnType { + scalar = _VALUE_TYPE_SCALAR, + column = _VALUE_TYPE_COLUMN, + eigen = _VALUE_TYPE_EIGEN_COLUMN +}; #define SoA_scalar(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) #define SoA_column(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) #define SoA_eigenColumn(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) -#define SoA_FundamentalTypeColumn(TYPE, NAME) (_VALUE_TYPE_FUNDAMENTAL_COLUMN, TYPE, NAME) /* Iterate on the macro MACRO and return the result as a comma separated list */ #define _ITERATE_ON_ALL_COMMA(MACRO, DATA, ...) \ @@ -163,17 +167,14 @@ inline size_t alignSize(size_t size, size_t alignment = 128) { ) /* Switch on macros depending on scalar / column type */ -#define _SWITCH_ON_TYPE(VALUE_TYPE, IF_SCALAR, IF_COLUMN, IF_EIGEN_COLUMN, IF_FUNDAMENTAL_COLUMN) \ +#define _SWITCH_ON_TYPE(VALUE_TYPE, IF_SCALAR, IF_COLUMN, IF_EIGEN_COLUMN) \ BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_SCALAR), \ IF_SCALAR, \ BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_COLUMN), \ IF_COLUMN, \ BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), \ IF_EIGEN_COLUMN, \ - BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_FUNDAMENTAL_COLUMN), \ - IF_FUNDAMENTAL_COLUMN, \ - BOOST_PP_EMPTY() \ - ) \ + BOOST_PP_EMPTY() \ ) \ ) \ ) diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoAStore.h index fdd4cc987..fdead868d 100644 --- a/src/cudadev/DataFormats/SoAStore.h +++ b/src/cudadev/DataFormats/SoAStore.h @@ -14,7 +14,7 @@ /* dump SoA fields information; these should expand to, for columns: * Example: - * declare_SoA_template(SoA, + * generate_SoA_store(SoA, * // predefined static scalars * // size_t size; * // size_t alignment; @@ -82,13 +82,6 @@ << std::endl; \ offset+=(((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - , \ - /* Dump fundamental type column */ \ - std::cout << " Column " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ - << " has size " << sizeof(CPP_TYPE) * nElements << " and padding " \ - << (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment - (sizeof(CPP_TYPE) * nElements) \ - << std::endl; \ - offset+=(((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ ) #define _DECLARE_SOA_DUMP_INFO(R, DATA, TYPE_NAME) \ @@ -98,53 +91,36 @@ /** * SoAMetadata member computing column pitch */ -#define _COMPUTE_SOA_COLUMN_PITCH_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ +#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ _SWITCH_ON_TYPE(VALUE_TYPE, \ /* Scalar */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ } \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::scalar; \ + CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ , \ /* Column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ } \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::column; \ + CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ , \ /* Eigen column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_ \ * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ } \ - , \ - /* Fundamental type column */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ - } \ - ) - -#define _COMPUTE_SOA_COLUMN_PITCH(R, DATA, TYPE_NAME) \ - _COMPUTE_SOA_COLUMN_PITCH_IMPL TYPE_NAME - -/** - * SoAMetadata member computing column pitch - */ -#define _DEFINE_SOA_COLUMN_TYPES_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - , \ - /* Column */ \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - , \ - /* Eigen column */ \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - , \ - /* Fundamental type column */ \ typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::eigen; \ + CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ ) -#define _DEFINE_SOA_COLUMN_TYPES(R, DATA, TYPE_NAME) \ - _DEFINE_SOA_COLUMN_TYPES_IMPL TYPE_NAME +#define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) \ + _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME /** * Member assignment for trivial constructor @@ -160,9 +136,6 @@ /* Eigen column */ \ ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ ( BOOST_PP_CAT(NAME, Stride_) (0) ) \ - , \ - /* Fundamental type column */ \ - ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ ) #define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, TYPE_NAME) \ @@ -186,10 +159,6 @@ * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ BOOST_PP_CAT(NAME, Stride_) = (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment_) + 1) \ * byteAlignment_ / sizeof(CPP_TYPE::Scalar); \ - , \ - /* Fundamental type column */ \ - BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ - curMem += (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment_) + 1) * byteAlignment_; \ ) #define _ASSIGN_SOA_COLUMN_OR_SCALAR(R, DATA, TYPE_NAME) \ @@ -209,9 +178,6 @@ /* Eigen column */ \ ret += (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - , \ - /* Fundamental type column */ \ - ret += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ ) #define _ACCUMULATE_SOA_ELEMENT(R, DATA, TYPE_NAME) \ @@ -234,9 +200,6 @@ EigenConstMapMaker::Type const NAME() { \ return EigenConstMapMaker::withData(soa_. NAME () + index_).withStride(soa_.BOOST_PP_CAT(NAME, Stride)()); \ } \ - , \ - /* Fundamental type column */ \ - CPP_TYPE const & NAME() { return * (soa_. NAME () + index_ + 0 + 0); } \ ) #define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) \ @@ -255,9 +218,6 @@ , \ /* Eigen column */ \ (CPP_TYPE::Scalar *NAME) (size_t BOOST_PP_CAT(NAME, Stride)) \ - , \ - /* Fundamental type column */ \ - (CPP_TYPE &NAME) \ ) #define _DECLARE_ELEMENT_VALUE_ARG(R, DATA, TYPE_NAME) \ @@ -276,9 +236,6 @@ , \ /* Eigen column */ \ (NAME (DATA, NAME, BOOST_PP_CAT(NAME, Stride))) \ - , \ - /* Fundamental type column */ \ - (NAME (NAME)) \ ) /* declare AoS-like element value args for contructor; these should expand,for columns only */ @@ -298,9 +255,6 @@ , \ /* Eigen column */ \ (BOOST_PP_CAT(NAME, _) (DATA, NAME, BOOST_PP_CAT(NAME, Stride))) \ - , \ - /* Fundamental type column */ \ - (BOOST_PP_CAT(NAME, _) (NAME)) \ ) /* declare AoS-like element value args for contructor; these should expand,for columns only */ @@ -319,9 +273,6 @@ , \ /* Eigen column */ \ static_cast(NAME) = static_cast::type &>(other.NAME); \ - , \ - /* Fundamental type column */ \ - NAME = static_cast::type &>(other.NAME); \ ) #define _DECLARE_ELEMENT_VALUE_COPY(R, DATA, TYPE_NAME) \ @@ -340,9 +291,6 @@ , \ /* Eigen column */ \ const SoAEigenValue BOOST_PP_CAT(NAME, _); \ - , \ - /* Fundamental type column */ \ - const CPP_TYPE & BOOST_PP_CAT(NAME, _); \ ) #define _DECLARE_CONST_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) \ @@ -356,14 +304,11 @@ /* Scalar */ \ BOOST_PP_EMPTY() \ , \ - /* Column */ /* (LOAD_INCOHERENT already done inside NAME_() */ \ + /* Column */ \ SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return BOOST_PP_CAT(NAME, _)(); } \ , \ /* Eigen column */ \ SOA_HOST_DEVICE_INLINE const SoAEigenValue NAME() const { return BOOST_PP_CAT(NAME, _); } \ - , \ - /* Fundamental type column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return LOAD_INCOHERENT(& BOOST_PP_CAT(NAME, _) ); } \ ) #define _DECLARE_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) \ @@ -382,9 +327,6 @@ , \ /* Eigen column */ \ SoAEigenValue NAME; \ - , \ - /* Fundamental type column */ \ - CPP_TYPE & NAME; \ ) @@ -404,9 +346,6 @@ , \ /* Eigen column */ \ (BOOST_PP_CAT(NAME, _)) (BOOST_PP_CAT(NAME, Stride_)) \ - , \ - /* Fundamental type column */ \ - (BOOST_PP_CAT(NAME, _[index])) \ ) #define _DECLARE_ELEMENT_CONSTR_CALL(R, DATA, TYPE_NAME) \ @@ -427,10 +366,6 @@ /* Eigen column */ \ /* Unsupported for the moment TODO */ \ BOOST_PP_EMPTY() \ - , \ - /* Fundamental type column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; } \ ) #define _DECLARE_SOA_ACCESSOR(R, DATA, TYPE_NAME) \ @@ -442,19 +377,15 @@ #define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ _SWITCH_ON_TYPE(VALUE_TYPE, \ /* Scalar */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return LOAD_INCOHERENT(BOOST_PP_CAT(NAME, _)); } \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return *(BOOST_PP_CAT(NAME, _)); } \ , \ /* Column */ \ SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) const { return LOAD_INCOHERENT(BOOST_PP_CAT(NAME, _) + index); } \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) const { return *(BOOST_PP_CAT(NAME, _) + index); } \ , \ /* Eigen column */ \ SOA_HOST_DEVICE_INLINE CPP_TYPE::Scalar const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT(NAME,Stride)() { return BOOST_PP_CAT(NAME, Stride_); } \ - , \ - /* Fundamental type column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) const { return LOAD_INCOHERENT(BOOST_PP_CAT(NAME, _) + index); } \ + SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT(NAME,Stride)() { return BOOST_PP_CAT(NAME, Stride_); } \ ) #define _DECLARE_SOA_CONST_ACCESSOR(R, DATA, TYPE_NAME) \ @@ -474,9 +405,6 @@ /* Eigen column */ \ CPP_TYPE::Scalar * BOOST_PP_CAT(NAME, _) = nullptr; \ size_t BOOST_PP_CAT(NAME, Stride_) = 0; \ - , \ - /* Fundamental type column */ \ - CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ ) #define _DECLARE_SOA_DATA_MEMBER(R, DATA, TYPE_NAME) \ @@ -533,8 +461,7 @@ struct CLASS { SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) { \ return CLASS(addr, parent_.nElements_, parent_.byteAlignment_ ); \ } \ - _ITERATE_ON_ALL(_COMPUTE_SOA_COLUMN_PITCH, ~, __VA_ARGS__) \ - _ITERATE_ON_ALL(_DEFINE_SOA_COLUMN_TYPES, ~, __VA_ARGS__) \ + _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ \ private: \ SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 6b26d42fb..b84127aab 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -58,11 +58,17 @@ /** * Member types aliasing for referencing by name */ -#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - typedef BOOST_PP_CAT(TypeOf_, STORE_NAME) :: SoAMetadata:: BOOST_PP_CAT(TypeOf_, STORE_MEMBER) BOOST_PP_CAT(TypeOf_, LOCAL_NAME); +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + typedef BOOST_PP_CAT(TypeOf_, STORE_NAME) :: SoAMetadata:: BOOST_PP_CAT(TypeOf_, STORE_MEMBER) BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ + BOOST_PP_CAT(TypeOf_, STORE_NAME) :: SoAMetadata:: BOOST_PP_CAT(ColumnTypeOf_, STORE_MEMBER); \ + DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME) () const { \ + return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ + }; \ + static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::eigen, "Eigen columns not supported in views."); #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL STORE_MEMBER_NAME) + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) /** * Member assignment for trivial constructor @@ -76,7 +82,7 @@ /** * Generator of parameters (stores) for constructor. */ -#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(STORE_TYPE, STORE_NAME, DATA) \ +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(STORE_TYPE, STORE_NAME, DATA) \ ( DATA STORE_TYPE & STORE_NAME ) #define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS(R, DATA, TYPE_NAME) \ @@ -84,9 +90,15 @@ /** * Generator of member initialization from constructor. + * We use a lambda with auto return type to handle multiple possible return types. */ #define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(STORE, MEMBER, NAME) \ - ( BOOST_PP_CAT(NAME, _) ( STORE . MEMBER () ) ) + ( BOOST_PP_CAT(NAME, _) ( [&]() -> auto { \ + static_assert ( BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::eigen, \ + "Eigen values not supported in views" ); \ + return STORE . soaMetadata() . BOOST_PP_CAT(addressOf_, MEMBER) (); \ + }() ) ) + #define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL STORE_MEMBER_NAME) @@ -195,7 +207,7 @@ return BOOST_PP_CAT(LOCAL_NAME, _); \ } \ SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME(size_t index) const { \ - return LOAD_INCOHERENT(BOOST_PP_CAT(LOCAL_NAME, _) + index); \ + return *(BOOST_PP_CAT(LOCAL_NAME, _) + index); \ } #define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ @@ -220,12 +232,18 @@ struct CLASS { * Helper/friend class allowing SoA introspection. \ */ \ struct SoAMetadata { \ + friend CLASS; \ /* Alias store types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, ~, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ + const CLASS& parent_; \ }; \ + friend SoAMetadata ; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ CLASS(): \ @@ -301,12 +319,18 @@ struct CLASS { * Helper/friend class allowing SoA introspection. \ */ \ struct SoAMetadata { \ + friend CLASS; \ /* Alias store types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, ~, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ + const CLASS& parent_; \ }; \ + friend SoAMetadata ; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ CLASS(): \ @@ -345,7 +369,7 @@ struct CLASS { template SOA_HOST_ONLY friend void dump(); \ \ private: \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ } #endif // ndef DataStrcutures_SoAView_h \ No newline at end of file From c815fe20865292fef7f4eecca3426864fae32bd0 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 19 Oct 2021 10:26:17 +0200 Subject: [PATCH 08/50] [cudadev] Added unit test to validate SoA stores and view. --- src/cudadev/test/SoAStoreAndView_t.cu | 154 ++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 src/cudadev/test/SoAStoreAndView_t.cu diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu new file mode 100644 index 000000000..c3709f42f --- /dev/null +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -0,0 +1,154 @@ +#include "DataFormats/SoAStore.h" +#include "DataFormats/SoAView.h" +#include +#include +#include + +// Test SoA stores and view. +// Use cases +// Multiple stores in a buffer +// Scalars, Columns of scalars and of Eigen vectors +// View to each of them, from one and multiple stores. + +generate_SoA_store(SoA1, + // predefined static scalars + // size_t size; + // size_t alignment; + + // columns: one value per element + SoA_column(double, x), + SoA_column(double, y), + SoA_column(double, z), + SoA_eigenColumn(Eigen::Vector3d, a), + SoA_eigenColumn(Eigen::Vector3d, b), + SoA_eigenColumn(Eigen::Vector3d, r), + SoA_column(uint16_t, color), + SoA_column(int32_t, value), + SoA_column(double *, py), + SoA_column(uint32_t, count), + SoA_column(uint32_t, anotherCount), + + // scalars: one value for the whole structure + SoA_scalar(const char *, description), + SoA_scalar(uint32_t, someNumber) +); + +// A 1 to 1 view of the store (except for unsupported types). +generate_SoA_view(SoA1View, + SoA_view_store_list( + SoA_view_store(SoA1, soa1) + ), + SoA_view_value_list( + SoA_view_value(soa1, x, x), + SoA_view_value(soa1, y, y), + SoA_view_value(soa1, z, z), + SoA_view_value(soa1, color, color), + SoA_view_value(soa1, value, value), + SoA_view_value(soa1, py, py), + SoA_view_value(soa1, count, count), + SoA_view_value(soa1, anotherCount, anotherCount), + SoA_view_value(soa1, description, description), + SoA_view_value(soa1, someNumber, someNumber) + ) +); + +// A partial view (artificial mix of store and view) +generate_SoA_view(SoA1View2G, + SoA_view_store_list( + SoA_view_store(SoA1, soa1), + SoA_view_store(SoA1View, soa1v) + ), + SoA_view_value_list( + SoA_view_value(soa1, x, x), + SoA_view_value(soa1v, y, y), + SoA_view_value(soa1, color, color), + SoA_view_value(soa1v, value, value), + SoA_view_value(soa1v, count, count), + SoA_view_value(soa1, anotherCount, anotherCount), + SoA_view_value(soa1v, description, description), + SoA_view_value(soa1, someNumber, someNumber) + ) +); + + + +// Same partial view, yet const. +generate_SoA_const_view(SoA1View2Gconst, + SoA_view_store_list( + SoA_view_store(SoA1, soa1), + SoA_view_store(SoA1View, soa1v) + ), + SoA_view_value_list( + SoA_view_value(soa1, x, x), + SoA_view_value(soa1v, y, y), +/* Eigen columns are not supported in views. + SoA_view_value(soa1, a, a), + SoA_view_value(soa1, b, b), + SoA_view_value(soa1, r, r), */ + SoA_view_value(soa1, color, color), + SoA_view_value(soa1v, value, value), + SoA_view_value(soa1v, count, count), + SoA_view_value(soa1, anotherCount, anotherCount), + SoA_view_value(soa1v, description, description), + SoA_view_value(soa1, someNumber, someNumber) + ) +); + +const size_t size=10000; + +int main() { + // Allocate buffer + std::unique_ptr buffer( + static_cast(std::aligned_alloc(SoA1::defaultAlignment, SoA1::computeDataSize(size))), + std::free); + SoA1 soa1(buffer.get(), size); + SoA1View soa1view (soa1); + SoA1View2G soa1v2g (soa1, soa1view); + SoA1View2Gconst soa1v2gconst (soa1, soa1view); + // Write to view + for (size_t i=0; i < size; i++) { + auto s = soa1[i]; + s.x = 1.0 * i; + s.y = 2.0 * i; + s.z = 3.0 * i; + s.color() = i; + s.a()(0) = 1.0 * i; + s.a()(1) = 2.0 * i; + s.a()(2) = 3.0 * i; + s.b()(0) = 3.0 * i; + s.b()(1) = 2.0 * i; + s.b()(2) = 1.0 * i; + s.r() = s.a().cross(s.b()); + } + // Check direct read back + for (size_t i=0; i < size; i++) { + auto s = soa1[i]; + assert(s.x() == 1.0 * i); + assert(s.y() == 2.0 * i); + assert(s.z() == 3.0 * i); + assert(s.color() == i); + assert(s.a()(0) == 1.0 * i); + assert(s.a()(1) == 2.0 * i); + assert(s.a()(2) == 3.0 * i); + assert(s.b()(0) == 3.0 * i); + assert(s.b()(1) == 2.0 * i); + assert(s.b()(2) == 1.0 * i); + assert(s.r() == s.a().cross(s.b())); + } + // Check readback through views + for (size_t i=0; i < size; i++) { + auto sv = soa1view[i]; + auto sv2g = soa1v2g[i]; + auto sv2gc = soa1v2gconst[i]; + assert(sv.x() == 1.0 * i); + assert(sv.y() == 2.0 * i); + assert(sv.z() == 3.0 * i); + assert(sv.color() == i); + assert(sv2g.x() == 1.0 * i); + assert(sv2g.y() == 2.0 * i); + assert(sv2g.color() == i); + assert(sv2gc.x() == 1.0 * i); + assert(sv2gc.y() == 2.0 * i); + assert(sv2gc.color() == i); + } +} \ No newline at end of file From 37184593e9761cd7bbc24a16227fdb00a7faa710 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 16 Nov 2021 17:11:02 +0100 Subject: [PATCH 09/50] [cudadev] removal of unused make_device_unique_uninitialized --- src/cudadev/CUDACore/device_unique_ptr.h | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/cudadev/CUDACore/device_unique_ptr.h b/src/cudadev/CUDACore/device_unique_ptr.h index 6e67d6cc7..fb736cfe2 100644 --- a/src/cudadev/CUDACore/device_unique_ptr.h +++ b/src/cudadev/CUDACore/device_unique_ptr.h @@ -69,28 +69,6 @@ namespace cms { template typename device::impl::make_device_unique_selector::bounded_array make_device_unique(Args &&...) = delete; - - // No check for the trivial constructor, make it clear in the interface - template - typename device::impl::make_device_unique_selector::non_array make_device_unique_uninitialized( - cudaStream_t stream) { - void *mem = allocate_device(sizeof(T), stream); - return typename device::impl::make_device_unique_selector::non_array{reinterpret_cast(mem), - device::impl::DeviceDeleter{stream}}; - } - - template - typename device::impl::make_device_unique_selector::unbounded_array make_device_unique_uninitialized( - size_t n, cudaStream_t stream) { - using element_type = typename std::remove_extent::type; - void *mem = allocate_device(n * sizeof(element_type), stream); - return typename device::impl::make_device_unique_selector::unbounded_array{ - reinterpret_cast(mem), device::impl::DeviceDeleter{stream}}; - } - - template - typename device::impl::make_device_unique_selector::bounded_array make_device_unique_uninitialized(Args &&...) = - delete; } // namespace cuda } // namespace cms From be3fb71a7f0d1f52b9b5b8ee016f4529e080ea14 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 23 Nov 2021 13:37:49 +0100 Subject: [PATCH 10/50] [cudadev] Simplified resetting store and buffer. --- src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc index 9bc8207ac..403881ffc 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc @@ -29,8 +29,7 @@ SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer(size_t maxFedWords, cud {} void SiPixelDigisCUDA::HostStoreAndBuffer::reset() { - hostStore_.~HostDeviceStore(); - new(&hostStore_) HostDeviceStore(nullptr, 0); + hostStore_ = HostDeviceStore(); data_h.reset(); } From aaadfb4f35b39ccdbb17dfd78be8b260593d9feb Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 23 Nov 2021 14:18:46 +0100 Subject: [PATCH 11/50] [cudadev] Reflected const nature of class in variables and function names. --- src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc | 4 ++-- src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h | 6 +++--- src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu | 2 +- src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc index 403881ffc..fce90e452 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc @@ -12,11 +12,11 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) deviceOnlyStore_d(data_d.get(), maxFedWords), hostDeviceStore_d(deviceOnlyStore_d.soaMetadata().nextByte(), maxFedWords), deviceFullView_(deviceOnlyStore_d, hostDeviceStore_d), - devicePixelView_(deviceFullView_) + devicePixelConstView_(deviceFullView_) {} SiPixelDigisCUDA::SiPixelDigisCUDA() - : data_d(),deviceOnlyStore_d(), hostDeviceStore_d(), deviceFullView_(), devicePixelView_() + : data_d(),deviceOnlyStore_d(), hostDeviceStore_d(), deviceFullView_(), devicePixelConstView_() {} SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer() diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index b68797bdf..48231aad8 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -46,7 +46,7 @@ class SiPixelDigisCUDA { ); /* Device pixel view: this is a second generation view (view from view) */ - generate_SoA_const_view(DevicePixelView, + generate_SoA_const_view(DevicePixelConstView, /* We get out data from the DeviceFullStore */ SoA_view_store_list( SoA_view_store(DeviceFullView, deviceFullView) @@ -110,7 +110,7 @@ class SiPixelDigisCUDA { // Special copy for validation cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; - const DevicePixelView& pixelView() const { return devicePixelView_; } + const DevicePixelConstView& pixelConstView() const { return devicePixelConstView_; } private: // These are consumed by downstream device code @@ -118,7 +118,7 @@ class SiPixelDigisCUDA { DeviceOnlyStore deviceOnlyStore_d; HostDeviceStore hostDeviceStore_d; DeviceFullView deviceFullView_; - DevicePixelView devicePixelView_; + DevicePixelConstView devicePixelConstView_; uint32_t nModules_h = 0; uint32_t nDigis_h = 0; }; diff --git a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu index fcee79db8..b9d27cdda 100644 --- a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu @@ -50,7 +50,7 @@ namespace pixelgpudetails { // protect from empty events if (blocks) { gpuPixelRecHits::getHits<<>>( - cpeParams, bs_d.data(), digis_d.pixelView(), digis_d.nDigis(), clusters_d.store(), hits_d.store()); + cpeParams, bs_d.data(), digis_d.pixelConstView(), digis_d.nDigis(), clusters_d.store(), hits_d.store()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaCheck(cudaDeviceSynchronize()); diff --git a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h index 2c3d7bff2..bb5032eeb 100644 --- a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h +++ b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h @@ -16,7 +16,7 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, BeamSpotPOD const* __restrict__ bs, - SiPixelDigisCUDA::DevicePixelView pdigis, + SiPixelDigisCUDA::DevicePixelConstView pdigis, int numElements, SiPixelClustersCUDA::DeviceStore const pclusters, TrackingRecHit2DSOAStore* phits) { From 7cf639e2f291e8c706422a9815cbc7bef1ab10e7 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 24 Nov 2021 12:01:47 +0100 Subject: [PATCH 12/50] [cudadev] Changed SiPixelClustersCUDA product access method from store to const view. Create a const view type, and renamed the access method return type and name. --- src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h | 15 ++++++++++++++- .../TrackingRecHit2DHostSOAView.cc | 2 +- .../plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu | 2 +- .../plugin-SiPixelRecHits/gpuPixelRecHits.h | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 4543c7551..79914c045 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -5,6 +5,7 @@ #include "CUDACore/host_unique_ptr.h" #include "CUDACore/cudaCompat.h" #include "DataFormats/SoAStore.h" +#include "DataFormats/SoAView.h" #include @@ -19,6 +20,18 @@ class SiPixelClustersCUDA { SoA_column(uint32_t, clusModuleStart) // index of the first cluster of each module ); + generate_SoA_const_view(DeviceConstView, + SoA_view_store_list(SoA_view_store(DeviceStore, deviceStore)), + SoA_view_value_list( + SoA_view_value(deviceStore, moduleStart, moduleStart), // index of the first pixel of each module + SoA_view_value(deviceStore, clusInModule, clusInModule), // number of clusters found in each module + SoA_view_value(deviceStore, moduleId, moduleId), // module id of each module + + // originally from rechits + SoA_view_value(deviceStore, clusModuleStart, clusModuleStart) // index of the first cluster of each module + ) + ); + explicit SiPixelClustersCUDA(); explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream); ~SiPixelClustersCUDA() = default; @@ -42,7 +55,7 @@ class SiPixelClustersCUDA { uint32_t const *moduleId() const { return deviceStore_.moduleId(); } uint32_t const *clusModuleStart() const { return deviceStore_.clusModuleStart(); } - const DeviceStore store() const { return deviceStore_; } + DeviceConstView view() const { return DeviceConstView(deviceStore_); } private: cms::cuda::device::unique_ptr data_d; // Single SoA storage diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc index 7eb795951..39c0379cf 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc @@ -6,7 +6,7 @@ TrackingRecHit2DHostSOAView::TrackingRecHit2DHostSOAView(): void TrackingRecHit2DHostSOAView::reset() { hits_h.reset(); - hitsStore_.~HitsStore(); + hitsStore_ = TrackingRecHit2DSOAStore::HitsStore(); } TrackingRecHit2DHostSOAView::TrackingRecHit2DHostSOAView(size_t size, cudaStream_t stream): diff --git a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu index b9d27cdda..d14d57a5e 100644 --- a/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelRecHits/PixelRecHitGPUKernel.cu @@ -50,7 +50,7 @@ namespace pixelgpudetails { // protect from empty events if (blocks) { gpuPixelRecHits::getHits<<>>( - cpeParams, bs_d.data(), digis_d.pixelConstView(), digis_d.nDigis(), clusters_d.store(), hits_d.store()); + cpeParams, bs_d.data(), digis_d.pixelConstView(), digis_d.nDigis(), clusters_d.view(), hits_d.store()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaCheck(cudaDeviceSynchronize()); diff --git a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h index bb5032eeb..8a92dc98e 100644 --- a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h +++ b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h @@ -18,7 +18,7 @@ namespace gpuPixelRecHits { BeamSpotPOD const* __restrict__ bs, SiPixelDigisCUDA::DevicePixelConstView pdigis, int numElements, - SiPixelClustersCUDA::DeviceStore const pclusters, + SiPixelClustersCUDA::DeviceConstView const pclusters, TrackingRecHit2DSOAStore* phits) { // FIXME // the compiler seems NOT to optimize loads from views (even in a simple test case) From 851d711609fad7e53b027fb345f1d76746d3fb23 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 24 Nov 2021 15:51:12 +0100 Subject: [PATCH 13/50] [cudadev] Renamed TrackingRecHit2DHostSOAView to TrackingRecHit2DHostSOAStore. The rename reflect the fact that the buffers are owned by the class. --- .../CUDADataFormats/TrackingRecHit2DHeterogeneous.cc | 4 ++-- .../CUDADataFormats/TrackingRecHit2DHeterogeneous.h | 4 ++-- ...ostSOAView.cc => TrackingRecHit2DHostSOAStore.cc} | 8 ++++---- ...DHostSOAView.h => TrackingRecHit2DHostSOAStore.h} | 12 ++++++------ src/cudadev/plugin-Validation/HistoValidator.cc | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) rename src/cudadev/CUDADataFormats/{TrackingRecHit2DHostSOAView.cc => TrackingRecHit2DHostSOAStore.cc} (55%) rename src/cudadev/CUDADataFormats/{TrackingRecHit2DHostSOAView.h => TrackingRecHit2DHostSOAStore.h} (79%) diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc index 80ddf44d8..41695adc9 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc @@ -6,9 +6,9 @@ #include "CondFormats/SiPixelROCsStatusAndMapping.h" template <> -TrackingRecHit2DHostSOAView TrackingRecHit2DCUDA::hitsToHostAsync(cudaStream_t stream) const { +TrackingRecHit2DHostSOAStore TrackingRecHit2DCUDA::hitsToHostAsync(cudaStream_t stream) const { // copy xl, yl, xerr, yerr, xg, yg, zg,rg, charge, clusterSizeX, clusterSizeY. - TrackingRecHit2DHostSOAView ret(nHits(), stream); + TrackingRecHit2DHostSOAStore ret(nHits(), stream); cms::cuda::copyAsync(ret.hits_h, m_hitsSupportLayerStartStore, ret.hitsStore_.soaMetadata().byteSize(), stream); return ret; } \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h index e7315cc9d..424105bfe 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h @@ -3,7 +3,7 @@ #include "CUDADataFormats/TrackingRecHit2DSOAView.h" #include "CUDADataFormats/HeterogeneousSoA.h" -#include "CUDADataFormats/TrackingRecHit2DHostSOAView.h" +#include "CUDADataFormats/TrackingRecHit2DHostSOAStore.h" template class TrackingRecHit2DHeterogeneous { @@ -39,7 +39,7 @@ class TrackingRecHit2DHeterogeneous { auto iphi() { return m_iphi; } // Transfer the local and global coordinates, charge and size - TrackingRecHit2DHostSOAView hitsToHostAsync(cudaStream_t stream) const; + TrackingRecHit2DHostSOAStore hitsToHostAsync(cudaStream_t stream) const; // apparently unused //cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc similarity index 55% rename from src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc rename to src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc index 39c0379cf..65b35f5e4 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc @@ -1,15 +1,15 @@ -#include "CUDADataFormats/TrackingRecHit2DHostSOAView.h" +#include "CUDADataFormats/TrackingRecHit2DHostSOAStore.h" -TrackingRecHit2DHostSOAView::TrackingRecHit2DHostSOAView(): +TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(): hitsStore_(hits_h.get(), 0 /* size */, 1 /* byte alignement */) {} -void TrackingRecHit2DHostSOAView::reset() { +void TrackingRecHit2DHostSOAStore::reset() { hits_h.reset(); hitsStore_ = TrackingRecHit2DSOAStore::HitsStore(); } -TrackingRecHit2DHostSOAView::TrackingRecHit2DHostSOAView(size_t size, cudaStream_t stream): +TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream): hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsStore::computeDataSize(size), stream)), hitsStore_(hits_h.get(), size, 1 /* byte alignement */) {} diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h similarity index 79% rename from src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h rename to src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h index f86f6f104..8c4bc15ec 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h @@ -1,6 +1,6 @@ -#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostView_h -#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostView_h +#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostStore_h +#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostStore_h #include "CUDADataFormats/TrackingRecHit2DSOAView.h" #include "CUDACore/host_unique_ptr.h" @@ -8,19 +8,19 @@ template class TrackingRecHit2DHeterogeneous; -struct TrackingRecHit2DHostSOAView { +struct TrackingRecHit2DHostSOAStore { template friend class TrackingRecHit2DHeterogeneous; public: - TrackingRecHit2DHostSOAView(); + TrackingRecHit2DHostSOAStore(); void reset(); __device__ __forceinline__ const auto operator[](size_t i) const { return hitsStore_[i]; } __device__ __forceinline__ size_t size() { return hitsStore_.soaMetadata().size(); } private: - TrackingRecHit2DHostSOAView(size_t size, cudaStream_t stream); + TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream); cms::cuda::host::unique_ptr hits_h; TrackingRecHit2DSOAStore::HitsStore hitsStore_; }; -#endif // ndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostView_h \ No newline at end of file +#endif // ndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostStore_h \ No newline at end of file diff --git a/src/cudadev/plugin-Validation/HistoValidator.cc b/src/cudadev/plugin-Validation/HistoValidator.cc index 9f998de8d..2ab0c0122 100644 --- a/src/cudadev/plugin-Validation/HistoValidator.cc +++ b/src/cudadev/plugin-Validation/HistoValidator.cc @@ -38,7 +38,7 @@ class HistoValidator : public edm::EDProducerExternalWork { uint32_t nHits; cms::cuda::host::unique_ptr h_adc; cms::cuda::host::unique_ptr h_clusInModule; - TrackingRecHit2DHostSOAView h_hits; + TrackingRecHit2DHostSOAStore h_hits; static std::map histos; }; From 6122c367698d7c3895cc9b9430b24b7e8e508a1d Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 24 Nov 2021 16:10:12 +0100 Subject: [PATCH 14/50] [cudadev] Removed redundant const. --- src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index 412e24dad..37315ef49 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -95,10 +95,10 @@ class TrackingRecHit2DSOAStore { ); // Shortcut operator saving the explicit calls to view in usage. - __device__ __forceinline__ HitsAndSupportView::element operator[] (size_t index) { + __device__ __forceinline__ HitsAndSupportView::element operator[] (size_t index) { return m_hitsAndSupportView[index]; } - __device__ __forceinline__ const HitsAndSupportView::const_element operator[] (size_t index) const { + __device__ __forceinline__ HitsAndSupportView::const_element operator[] (size_t index) const { return m_hitsAndSupportView[index]; } From ca7840e0f764561d636a464b044274c9154f7f52 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 24 Nov 2021 17:24:29 +0100 Subject: [PATCH 15/50] [cudadev] Simplified reassignment. --- src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 3139a7b13..2d9a15bba 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -34,9 +34,7 @@ class SiPixelROCsStatusAndMappingWrapper { void allocate(size_t size, cudaStream_t stream) { cablingMapDeviceBuffer = cms::cuda::make_device_unique( SiPixelROCsStatusAndMapping::computeDataSize(size), stream); - // Explicit call to destructor before overwriting the object. - cablingMapDevice.~SiPixelROCsStatusAndMapping(); - new(&cablingMapDevice) SiPixelROCsStatusAndMapping(cablingMapDeviceBuffer.get(), size); + cablingMapDevice = SiPixelROCsStatusAndMapping(cablingMapDeviceBuffer.get(), size); } cms::cuda::device::unique_ptr cablingMapDeviceBuffer; SiPixelROCsStatusAndMapping cablingMapDevice = SiPixelROCsStatusAndMapping(nullptr, 0); // map struct in GPU From 3a7b692749bffe123c642e59a2ab0bffd9426e5f Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 25 Nov 2021 15:29:55 +0100 Subject: [PATCH 16/50] [cudadev] Imported templated SoA structured from CMSSW The templation allows defining compile time alignment choice, alignement enforcement, and alignment hinting for the compiler. --- src/cudadev/DataFormats/SoACommon.h | 156 +++--- src/cudadev/DataFormats/SoAStore.h | 782 +++++++++++++--------------- src/cudadev/DataFormats/SoAView.h | 535 ++++++++++--------- 3 files changed, 724 insertions(+), 749 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 63f868fc4..a3550f023 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -2,8 +2,8 @@ * Definitions of SoA common parameters for SoA class generators */ -#ifndef DataStrcutures_SoACommon_h -#define DataStrcutures_SoACommon_h +#ifndef DataStructures_SoACommon_h +#define DataStructures_SoACommon_h #include "boost/preprocessor.hpp" #include @@ -26,113 +26,122 @@ #if defined(__CUDACC__) && defined(__CUDA_ARCH__) // Read a pointer content via read-only (non coherent) cache. #define LOAD_INCOHERENT(A) __ldg(A) -#define LOAD_STREAMED(A) __ldcs(A) -#define STORE_STREAMED(A, V) __stcs(A, V) +#define LOAD_STREAMED(A) __ldcs(A) +#define STORE_STREAMED(A, V) __stcs(A, V) #else #define LOAD_INCOHERENT(A) *(A) -#define LOAD_STREAMED(A) *(A) +#define LOAD_STREAMED(A) *(A) #define STORE_STREAMED(A, V) *(A) = (V) #endif // compile-time sized SoA // Helper template managing the value within it column -template +// The optional compile time alignment parameter enables informing the +// compiler of alignment (enforced by caller). +template class SoAValue { public: - SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): idx_(i), col_(col) {} + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE T& operator() () { return col_[idx_]; } - SOA_HOST_DEVICE_INLINE T operator() () const { return *(col_ + idx_); } - SOA_HOST_DEVICE_INLINE T* operator& () { return &col_[idx_]; } - SOA_HOST_DEVICE_INLINE const T* operator& () const { return &col_[idx_]; } + SOA_HOST_DEVICE_INLINE T& operator()() { return alignedCol()[idx_]; } + SOA_HOST_DEVICE_INLINE T operator()() const { return *(alignedCol() + idx_); } + SOA_HOST_DEVICE_INLINE T* operator&() { return &alignedCol()[idx_]; } + SOA_HOST_DEVICE_INLINE const T* operator&() const { return &alignedCol()[idx_]; } template - SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return col_[idx_] = v; } + SOA_HOST_DEVICE_INLINE T& operator=(const T2& v) { + return alignedCol()[idx_] = v; + } typedef T valueType; static constexpr auto valueSize = sizeof(T); + private: + SOA_HOST_DEVICE_INLINE T* alignedCol() const { + if constexpr (ALIGNMENT) { + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + } + return col_; + } size_t idx_; - T *col_; + T* col_; }; // Helper template managing the value within it column -template +template class SoAConstValue { public: - SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T * col): idx_(i), col_(col) {} + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T* col) : idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE T operator() () const { return *(col_ + idx_); } - SOA_HOST_DEVICE_INLINE const T* operator& () const { return &col_[idx_]; } + SOA_HOST_DEVICE_INLINE T operator()() const { return *(alignedCol() + idx_); } + SOA_HOST_DEVICE_INLINE const T* operator&() const { return &alignedCol()[idx_]; } typedef T valueType; static constexpr auto valueSize = sizeof(T); + private: + SOA_HOST_DEVICE_INLINE const T* alignedCol() const { + if constexpr (ALIGNMENT) { + return __builtin_assume_aligned(col_, ALIGNMENT); + } + return col_; + } size_t idx_; - const T *col_; + const T* col_; }; - // Helper template managing the value within it column -template +template class SoAEigenValue { public: typedef C Type; typedef Eigen::Map> MapType; typedef Eigen::Map> CMapType; - SOA_HOST_DEVICE_INLINE SoAEigenValue(size_t i, typename C::Scalar * col, size_t stride): - val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, - Eigen::InnerStride(stride)), - crCol_(col), - cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, - Eigen::InnerStride(stride)), - stride_(stride) {} - SOA_HOST_DEVICE_INLINE MapType& operator() () { return val_; } - SOA_HOST_DEVICE_INLINE const CMapType& operator() () const { return cVal_; } + SOA_HOST_DEVICE_INLINE SoAEigenValue(size_t i, typename C::Scalar* col, size_t stride) + : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + crCol_(col), + cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + stride_(stride) {} + SOA_HOST_DEVICE_INLINE MapType& operator()() { return val_; } + SOA_HOST_DEVICE_INLINE const CMapType& operator()() const { return cVal_; } SOA_HOST_DEVICE_INLINE operator C() { return val_; } SOA_HOST_DEVICE_INLINE operator const C() const { return cVal_; } - SOA_HOST_DEVICE_INLINE C* operator& () { return &val_; } - SOA_HOST_DEVICE_INLINE const C* operator& () const { return &cVal_; } + SOA_HOST_DEVICE_INLINE C* operator&() { return &val_; } + SOA_HOST_DEVICE_INLINE const C* operator&() const { return &cVal_; } template - SOA_HOST_DEVICE_INLINE MapType& operator= (const C2& v) { return val_ = v; } + SOA_HOST_DEVICE_INLINE MapType& operator=(const C2& v) { + return val_ = v; + } typedef typename C::Scalar ValueType; static constexpr auto valueSize = sizeof(C::Scalar); SOA_HOST_DEVICE_INLINE size_t stride() { return stride_; } - template - typename Eigen::MatrixBase::template cross_product_return_type::type - SOA_HOST_DEVICE_INLINE cross(const Eigen::MatrixBase& other) const { return cVal_.cross(other); } - - template - typename Eigen::MatrixBase::template cross_product_return_type::type - SOA_HOST_DEVICE_INLINE cross(const OtherType& other) const { return cVal_.cross(other.cVal_); } - + private: MapType val_; - const typename C::Scalar * __restrict__ crCol_; + const typename C::Scalar* __restrict__ crCol_; CMapType cVal_; size_t stride_; }; // Helper template to avoid commas in macro -template +template struct EigenConstMapMaker { typedef Eigen::Map> Type; class DataHolder { public: - DataHolder(const typename C::Scalar * data): data_(data) {} + DataHolder(const typename C::Scalar* data) : data_(data) {} EigenConstMapMaker::Type withStride(size_t stride) { - return EigenConstMapMaker::Type(data_, C::RowsAtCompileTime, C::ColsAtCompileTime, - Eigen::InnerStride(stride)); + return EigenConstMapMaker::Type( + data_, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)); } + private: - const typename C::Scalar * const data_; + const typename C::Scalar* const data_; }; - static DataHolder withData(const typename C::Scalar * data) { - return DataHolder(data); - } + static DataHolder withData(const typename C::Scalar* data) { return DataHolder(data); } }; // Helper function to compute aligned size inline size_t alignSize(size_t size, size_t alignment = 128) { - if (size) + if (size) return ((size - 1) / alignment + 1) * alignment; else return 0; @@ -143,40 +152,31 @@ inline size_t alignSize(size_t size, size_t alignment = 128) { #define _VALUE_TYPE_COLUMN 1 #define _VALUE_TYPE_EIGEN_COLUMN 2 -enum class SoAColumnType { - scalar = _VALUE_TYPE_SCALAR, - column = _VALUE_TYPE_COLUMN, - eigen = _VALUE_TYPE_EIGEN_COLUMN -}; +enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COLUMN, eigen = _VALUE_TYPE_EIGEN_COLUMN }; #define SoA_scalar(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) #define SoA_column(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) #define SoA_eigenColumn(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) /* Iterate on the macro MACRO and return the result as a comma separated list */ -#define _ITERATE_ON_ALL_COMMA(MACRO, DATA, ...) \ - BOOST_PP_TUPLE_ENUM( \ - BOOST_PP_SEQ_TO_TUPLE( \ - _ITERATE_ON_ALL(MACRO, DATA, __VA_ARGS__) \ - ) \ - ) +#define _ITERATE_ON_ALL_COMMA(MACRO, DATA, ...) \ + BOOST_PP_TUPLE_ENUM(BOOST_PP_SEQ_TO_TUPLE(_ITERATE_ON_ALL(MACRO, DATA, __VA_ARGS__))) /* Iterate MACRO on all elements */ -#define _ITERATE_ON_ALL(MACRO, DATA, ...) \ - BOOST_PP_SEQ_FOR_EACH(MACRO, DATA, \ - BOOST_PP_VARIADIC_TO_SEQ(__VA_ARGS__) \ - ) +#define _ITERATE_ON_ALL(MACRO, DATA, ...) BOOST_PP_SEQ_FOR_EACH(MACRO, DATA, BOOST_PP_VARIADIC_TO_SEQ(__VA_ARGS__)) /* Switch on macros depending on scalar / column type */ -#define _SWITCH_ON_TYPE(VALUE_TYPE, IF_SCALAR, IF_COLUMN, IF_EIGEN_COLUMN) \ - BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_SCALAR), \ - IF_SCALAR, \ - BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_COLUMN), \ - IF_COLUMN, \ - BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), \ - IF_EIGEN_COLUMN, \ - BOOST_PP_EMPTY() \ - ) \ - ) \ - ) - -#endif // ndef DataStrcutures_SoACommon_h +#define _SWITCH_ON_TYPE(VALUE_TYPE, IF_SCALAR, IF_COLUMN, IF_EIGEN_COLUMN) \ + BOOST_PP_IF( \ + BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_SCALAR), \ + IF_SCALAR, \ + BOOST_PP_IF( \ + BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_COLUMN), \ + IF_COLUMN, \ + BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), IF_EIGEN_COLUMN, BOOST_PP_EMPTY()))) + +/* Enum parameters allowing templated control of store/view behaviors */ +/* Alignement enforcement verifies every column is aligned, and + * hints the compiler that it can expect column pointers to be aligned */ +enum class AlignmentEnforcement : bool { Relaxed, Enforced }; + +#endif // ndef DataStructures_SoACommon_h diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoAStore.h index fdead868d..ca1b4814a 100644 --- a/src/cudadev/DataFormats/SoAStore.h +++ b/src/cudadev/DataFormats/SoAStore.h @@ -3,15 +3,14 @@ * with compile-time size and alignment, and accessors to the "rows" and "columns". */ -#ifndef DataStrcutures_SoAStore_h -#define DataStrcutures_SoAStore_h +#ifndef DataStructures_SoAStore_h +#define DataStructures_SoAStore_h #include "SoACommon.h" #include #include - /* dump SoA fields information; these should expand to, for columns: * Example: * generate_SoA_store(SoA, @@ -57,358 +56,268 @@ * */ -#define _DECLARE_SOA_DUMP_INFO_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Dump scalar */ \ - std::cout << " Scalar " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ - << " has size " << sizeof(CPP_TYPE) << " and padding " \ - << ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment - sizeof(CPP_TYPE) \ - << std::endl; \ - offset+=((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment; \ - , \ - /* Dump column */ \ - std::cout << " Column " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ - << " has size " << sizeof(CPP_TYPE) * nElements << " and padding " \ - << (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment - (sizeof(CPP_TYPE) * nElements) \ - << std::endl; \ - offset+=(((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ - , \ - /* Dump Eigen column */ \ - std::cout << " Eigen value " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset \ - << " has dimension (" << CPP_TYPE::RowsAtCompileTime << " x " << CPP_TYPE::ColsAtCompileTime << ")" \ - << " and per column size " << sizeof(CPP_TYPE::Scalar) * nElements << " and padding " \ - << (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ - - (sizeof(CPP_TYPE::Scalar) * nElements) \ - << std::endl; \ - offset+=(((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ - * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ -) - -#define _DECLARE_SOA_DUMP_INFO(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_SOA_DUMP_INFO_IMPL TYPE_NAME) - +#define _DECLARE_SOA_DUMP_INFO_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Dump scalar */ \ + std::cout << " Scalar " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset << " has size " << sizeof(CPP_TYPE) << " and padding " \ + << ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment - sizeof(CPP_TYPE) \ + << std::endl; \ + offset += ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment; \ + , /* Dump column */ \ + std::cout \ + << " Column " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset << " has size " << sizeof(CPP_TYPE) * nElements \ + << " and padding " \ + << (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * \ + byteAlignment - \ + (sizeof(CPP_TYPE) * nElements) \ + << std::endl; \ + offset += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , /* Dump Eigen column */ \ + std::cout \ + << " Eigen value " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset << " has dimension (" << CPP_TYPE::RowsAtCompileTime << " x " \ + << CPP_TYPE::ColsAtCompileTime \ + << ")" \ + << " and per column size " \ + << sizeof(CPP_TYPE::Scalar) * nElements \ + << " and padding " \ + << (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * \ + byteAlignment - \ + (sizeof(CPP_TYPE::Scalar) * nElements) \ + << std::endl; \ + offset += (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment * \ + CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime;) + +#define _DECLARE_SOA_DUMP_INFO(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_DUMP_INFO_IMPL TYPE_NAME) /** * SoAMetadata member computing column pitch */ -#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ - } \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::scalar; \ - CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ - , \ - /* Column */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_; \ - } \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::column; \ - CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ - , \ - /* Eigen column */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / parent_.byteAlignment_) + 1) * parent_.byteAlignment_ \ - * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - } \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::eigen; \ - CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ - ) - -#define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) \ - _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME +#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::scalar; \ + CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Column */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ + ParentClass::byteAlignment; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::column; \ + CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Eigen column */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ + ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::eigen; \ + CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }) + +#define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME /** * Member assignment for trivial constructor */ -#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ - , \ - /* Column */ \ - ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ - , \ - /* Eigen column */ \ - ( BOOST_PP_CAT(NAME, _) (nullptr) ) \ - ( BOOST_PP_CAT(NAME, Stride_) (0) ) \ -) - -#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, TYPE_NAME) \ +#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + (BOOST_PP_CAT(NAME, _)(nullptr)), /* Column */ \ + (BOOST_PP_CAT(NAME, _)(nullptr)), /* Eigen column */ \ + (BOOST_PP_CAT(NAME, _)(nullptr))(BOOST_PP_CAT(NAME, Stride_)(0))) + +#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, TYPE_NAME) \ BOOST_PP_EXPAND(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL TYPE_NAME) /** * Computation of the column or scalar pointer location in the memory layout (at SoA construction time) */ -#define _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ - curMem += (((sizeof(CPP_TYPE) - 1) / byteAlignment_) + 1) * byteAlignment_; \ - , \ - /* Column */ \ - BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ - curMem += (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment_) + 1) * byteAlignment_; \ - , \ - /* Eigen column */ \ - BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ - curMem += (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment_) + 1) * byteAlignment_ \ - * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - BOOST_PP_CAT(NAME, Stride_) = (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment_) + 1) \ - * byteAlignment_ / sizeof(CPP_TYPE::Scalar); \ - ) - -#define _ASSIGN_SOA_COLUMN_OR_SCALAR(R, DATA, TYPE_NAME) \ - _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL TYPE_NAME +#define _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , /* Column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , /* Eigen column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment * \ + CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + BOOST_PP_CAT(NAME, Stride_) = (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * \ + byteAlignment / sizeof(CPP_TYPE::Scalar);) \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(BOOST_PP_CAT(NAME, _)) % byteAlignment) \ + throw std::out_of_range("In store constructor: misaligned column: " #NAME); + +#define _ASSIGN_SOA_COLUMN_OR_SCALAR(R, DATA, TYPE_NAME) _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL TYPE_NAME /** * Computation of the column or scalar size for SoA size computation */ -#define _ACCUMULATE_SOA_ELEMENT_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - ret += (((sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ - , \ - /* Column */ \ - ret += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ - , \ - /* Eigen column */ \ - ret += (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment \ - * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - ) - -#define _ACCUMULATE_SOA_ELEMENT(R, DATA, TYPE_NAME) \ - _ACCUMULATE_SOA_ELEMENT_IMPL TYPE_NAME +#define _ACCUMULATE_SOA_ELEMENT_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + ret += (((sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , /* Column */ \ + ret += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , /* Eigen column */ \ + ret += (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment * \ + CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime;) + +#define _ACCUMULATE_SOA_ELEMENT(R, DATA, TYPE_NAME) _ACCUMULATE_SOA_ELEMENT_IMPL TYPE_NAME /** * Value accessor of the const_element subclass. */ -#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - SOA_HOST_DEVICE_INLINE \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - CPP_TYPE const & NAME() { return soa_. NAME (); } \ - , \ - /* Column */ \ - CPP_TYPE const & NAME() { return * (soa_. NAME () + index_); } \ - , \ - /* Eigen column */ \ - /* Ugly hack with a helper template to avoid having commas inside the macro parameter */ \ - EigenConstMapMaker::Type const NAME() { \ - return EigenConstMapMaker::withData(soa_. NAME () + index_).withStride(soa_.BOOST_PP_CAT(NAME, Stride)()); \ - } \ - ) - -#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) \ - _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME +#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + SOA_HOST_DEVICE_INLINE \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + CPP_TYPE const& NAME() { return soa_.NAME(); }, /* Column */ \ + CPP_TYPE const& NAME() { return *(soa_.NAME() + index_); }, \ + /* Eigen column */ /* Ugly hack with a helper template to avoid having commas inside the macro parameter */ \ + EigenConstMapMaker::Type const NAME() { \ + return EigenConstMapMaker::withData(soa_.NAME() + index_) \ + .withStride(soa_.BOOST_PP_CAT(NAME, Stride)()); \ + }) + +#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME /** * Generator of parameters for (non-const) element subclass (expanded comma separated). */ -#define _DECLARE_ELEMENT_VALUE_ARG_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - (CPP_TYPE *NAME) \ - , \ - /* Eigen column */ \ - (CPP_TYPE::Scalar *NAME) (size_t BOOST_PP_CAT(NAME, Stride)) \ - ) - -#define _DECLARE_ELEMENT_VALUE_ARG(R, DATA, TYPE_NAME) \ - _DECLARE_ELEMENT_VALUE_ARG_IMPL TYPE_NAME +#define _DECLARE_ELEMENT_VALUE_ARG_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + (CPP_TYPE * NAME), /* Eigen column */ \ + (CPP_TYPE::Scalar * NAME)(size_t BOOST_PP_CAT(NAME, Stride))) + +#define _DECLARE_ELEMENT_VALUE_ARG(R, DATA, TYPE_NAME) _DECLARE_ELEMENT_VALUE_ARG_IMPL TYPE_NAME /** * Generator of member initialization for constructor of element subclass */ -#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - (NAME (DATA, NAME)) \ - , \ - /* Eigen column */ \ - (NAME (DATA, NAME, BOOST_PP_CAT(NAME, Stride))) \ - ) +#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + (NAME(DATA, NAME)), /* Eigen column */ \ + (NAME(DATA, NAME, BOOST_PP_CAT(NAME, Stride)))) /* declare AoS-like element value args for contructor; these should expand,for columns only */ -#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ +#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) /** * Generator of member initialization for constructor of const element subclass */ -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - (BOOST_PP_CAT(NAME, _) (DATA, NAME)) \ - , \ - /* Eigen column */ \ - (BOOST_PP_CAT(NAME, _) (DATA, NAME, BOOST_PP_CAT(NAME, Stride))) \ - ) +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + (BOOST_PP_CAT(NAME, _)(DATA, NAME)), /* Eigen column */ \ + (BOOST_PP_CAT(NAME, _)(DATA, NAME, BOOST_PP_CAT(NAME, Stride)))) /* declare AoS-like element value args for contructor; these should expand,for columns only */ -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ BOOST_PP_EXPAND(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) /** * Generator of the member-by-member copy operator of the element subclass. */ -#define _DECLARE_ELEMENT_VALUE_COPY_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - NAME() = other.NAME(); \ - , \ - /* Eigen column */ \ - static_cast(NAME) = static_cast::type &>(other.NAME); \ - ) - -#define _DECLARE_ELEMENT_VALUE_COPY(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_COPY_IMPL TYPE_NAME) +#define _DECLARE_ELEMENT_VALUE_COPY_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + NAME() = other.NAME(); \ + , /* Eigen column */ \ + static_cast(NAME) = static_cast::type&>(other.NAME);) + +#define _DECLARE_ELEMENT_VALUE_COPY(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_COPY_IMPL TYPE_NAME) /** * Declaration of the private members of the const element subclass */ -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - const SoAValue BOOST_PP_CAT(NAME, _); \ - , \ - /* Eigen column */ \ - const SoAEigenValue BOOST_PP_CAT(NAME, _); \ - ) - -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) \ - _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + const SoAValueWithConf BOOST_PP_CAT(NAME, _); \ + , /* Eigen column */ \ + const SoAEigenValueWithConf BOOST_PP_CAT(NAME, _);) + +#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME /** * Declaration of the members accessors of the const element subclass */ -#define _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return BOOST_PP_CAT(NAME, _)(); } \ - , \ - /* Eigen column */ \ - SOA_HOST_DEVICE_INLINE const SoAEigenValue NAME() const { return BOOST_PP_CAT(NAME, _); } \ - ) - -#define _DECLARE_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) \ - _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME +#define _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return BOOST_PP_CAT(NAME, _)(); }, /* Eigen column */ \ + SOA_HOST_DEVICE_INLINE const SoAEigenValueWithConf NAME() const { return BOOST_PP_CAT(NAME, _); }) + +#define _DECLARE_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME /** * Declaration of the members of the element subclass */ -#define _DECLARE_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - SoAValue NAME; \ - , \ - /* Eigen column */ \ - SoAEigenValue NAME; \ - ) - - -#define _DECLARE_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) \ - _DECLARE_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME +#define _DECLARE_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + SoAValueWithConf NAME; \ + , /* Eigen column */ \ + SoAEigenValueWithConf NAME;) + +#define _DECLARE_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) _DECLARE_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME /** * Parameters passed to element subclass constructor in operator[] */ -#define _DECLARE_ELEMENT_CONSTR_CALL_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - BOOST_PP_EMPTY() \ - , \ - /* Column */ \ - (BOOST_PP_CAT(NAME, _)) \ - , \ - /* Eigen column */ \ - (BOOST_PP_CAT(NAME, _)) (BOOST_PP_CAT(NAME, Stride_)) \ - ) - -#define _DECLARE_ELEMENT_CONSTR_CALL(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_ELEMENT_CONSTR_CALL_IMPL TYPE_NAME) +#define _DECLARE_ELEMENT_CONSTR_CALL_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + BOOST_PP_EMPTY(), /* Column */ \ + (BOOST_PP_CAT(NAME, _)), /* Eigen column */ \ + (BOOST_PP_CAT(NAME, _))(BOOST_PP_CAT(NAME, Stride_))) + +#define _DECLARE_ELEMENT_CONSTR_CALL(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_ELEMENT_CONSTR_CALL_IMPL TYPE_NAME) /** * Direct access to column pointer and indexed access */ -#define _DECLARE_SOA_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME() { return * BOOST_PP_CAT(NAME, _); } \ - , \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; } \ - , \ - /* Eigen column */ \ - /* Unsupported for the moment TODO */ \ - BOOST_PP_EMPTY() \ - ) - -#define _DECLARE_SOA_ACCESSOR(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_SOA_ACCESSOR_IMPL TYPE_NAME) +#define _DECLARE_SOA_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME() { return *BOOST_PP_CAT(NAME, _); }, /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { \ + return BOOST_PP_CAT(NAME, _); \ + } SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; }, \ + /* Eigen column */ /* Unsupported for the moment TODO */ \ + BOOST_PP_EMPTY()) + +#define _DECLARE_SOA_ACCESSOR(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_ACCESSOR_IMPL TYPE_NAME) /** * Direct access to column pointer (const) and indexed access. */ -#define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return *(BOOST_PP_CAT(NAME, _)); } \ - , \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) const { return *(BOOST_PP_CAT(NAME, _) + index); } \ - , \ - /* Eigen column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE::Scalar const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ - SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT(NAME,Stride)() { return BOOST_PP_CAT(NAME, Stride_); } \ - ) - -#define _DECLARE_SOA_CONST_ACCESSOR(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_SOA_CONST_ACCESSOR_IMPL TYPE_NAME) +#define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return *(BOOST_PP_CAT(NAME, _)); }, /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() \ + const { return BOOST_PP_CAT(NAME, _); } SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) \ + const { return *(BOOST_PP_CAT(NAME, _) + index); }, /* Eigen column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE::Scalar const* NAME() const { \ + return BOOST_PP_CAT(NAME, _); \ + } SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT(NAME, Stride)() { return BOOST_PP_CAT(NAME, Stride_); }) + +#define _DECLARE_SOA_CONST_ACCESSOR(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_CONST_ACCESSOR_IMPL TYPE_NAME) /** * SoA class member declaration (column pointers). */ -#define _DECLARE_SOA_DATA_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, \ - /* Scalar */ \ - CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ - , \ - /* Column */ \ - CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ - , \ - /* Eigen column */ \ - CPP_TYPE::Scalar * BOOST_PP_CAT(NAME, _) = nullptr; \ - size_t BOOST_PP_CAT(NAME, Stride_) = 0; \ - ) - -#define _DECLARE_SOA_DATA_MEMBER(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_SOA_DATA_MEMBER_IMPL TYPE_NAME) +#define _DECLARE_SOA_DATA_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ + CPP_TYPE* BOOST_PP_CAT(NAME, _) = nullptr; \ + , /* Column */ \ + CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ + , /* Eigen column */ \ + CPP_TYPE::Scalar * BOOST_PP_CAT(NAME, _) = nullptr; \ + size_t BOOST_PP_CAT(NAME, Stride_) = 0;) + +#define _DECLARE_SOA_DATA_MEMBER(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_DATA_MEMBER_IMPL TYPE_NAME) #ifdef DEBUG #define _DO_RANGECHECK true @@ -419,151 +328,158 @@ /* * A macro defining a SoA store (collection of scalars and columns of equal lengths */ -#define generate_SoA_store(CLASS, ...) \ -struct CLASS { \ - \ - /* these could be moved to an external type trait to free up the symbol names */ \ - using self_type = CLASS; \ - \ - /* For CUDA applications, we align to the 128 bytes of the cache lines. \ - * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ - * up to compute capability 8.X. \ - */ \ - constexpr static size_t defaultAlignment = 128; \ - \ - /* dump the SoA internal structure */ \ - SOA_HOST_ONLY \ - static void dump(size_t nElements, size_t byteAlignment = defaultAlignment) { \ - std::cout << #CLASS "(" << nElements << ", " << byteAlignment << "): " << std::endl; \ - std::cout << " sizeof(" #CLASS "): " << sizeof(CLASS) << std::endl; \ - size_t offset=0; \ - _ITERATE_ON_ALL(_DECLARE_SOA_DUMP_INFO, ~, __VA_ARGS__) \ - std::cout << "Final offset = " << offset << " computeDataSize(...): " << computeDataSize(nElements, byteAlignment) << std::endl;\ - std::cout << std::endl; \ - } \ - /* Helper function used by caller to externally allocate the storage */ \ - static size_t computeDataSize(size_t nElements, size_t byteAlignment = defaultAlignment) { \ - size_t ret = 0; \ - _ITERATE_ON_ALL(_ACCUMULATE_SOA_ELEMENT, ~, __VA_ARGS__) \ - return ret; \ - } \ - \ - /** \ +#define generate_SoA_store(CLASS, ...) \ + template \ + struct CLASS { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static size_t defaultAlignment = 128; \ + constexpr static size_t byteAlignment = ALIGNMENT; \ + constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ + constexpr static size_t conditionalAlignment = \ + alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = SoAValue; \ + \ + template \ + using SoAConstValueWithConf = SoAConstValue; \ + \ + template \ + using SoAEigenValueWithConf = SoAEigenValue; \ + /* dump the SoA internal structure */ \ + SOA_HOST_ONLY \ + static void dump(size_t nElements) { \ + std::cout << #CLASS "(" << nElements << ", " << ALIGNMENT << "): " << std::endl; \ + std::cout << " sizeof(" #CLASS "): " << sizeof(CLASS) << std::endl; \ + size_t offset = 0; \ + _ITERATE_ON_ALL(_DECLARE_SOA_DUMP_INFO, ~, __VA_ARGS__) \ + std::cout << "Final offset = " << offset << " computeDataSize(...): " << computeDataSize(nElements) \ + << std::endl; \ + std::cout << std::endl; \ + } \ + /* Helper function used by caller to externally allocate the storage */ \ + static size_t computeDataSize(size_t nElements) { \ + size_t ret = 0; \ + _ITERATE_ON_ALL(_ACCUMULATE_SOA_ELEMENT, ~, __VA_ARGS__) \ + return ret; \ + } \ + \ + /** \ * Helper/friend class allowing SoA introspection. \ - */ \ - struct SoAMetadata { \ - friend CLASS; \ - SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ - SOA_HOST_DEVICE_INLINE size_t byteSize() const { return parent_.byteSize_; } \ - SOA_HOST_DEVICE_INLINE size_t byteAlignment() const { return parent_.byteAlignment_; } \ - SOA_HOST_DEVICE_INLINE std::byte* data() const { return parent_.mem_; } \ - SOA_HOST_DEVICE_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ - SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) { \ - return CLASS(addr, parent_.nElements_, parent_.byteAlignment_ ); \ - } \ - _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ - \ - private: \ - SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ - const CLASS& parent_; \ - }; \ - friend SoAMetadata; \ - SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ - \ - /* Trivial constuctor */ \ - CLASS(): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION, ~, __VA_ARGS__) \ - {} \ - \ - /* Constructor relying on user provided storage */ \ - SOA_HOST_ONLY CLASS(std::byte* mem, size_t nElements, size_t byteAlignment = defaultAlignment): \ - mem_(mem), nElements_(nElements), byteAlignment_(byteAlignment) { \ - auto curMem = mem_; \ - _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ - /* Sanity check: we should have reached the computed size, only on host code */ \ - byteSize_ = computeDataSize(nElements_, byteAlignment_); \ - if(mem_ + byteSize_ != curMem) \ - throw std::out_of_range("In " #CLASS "::" #CLASS ": unexpected end pointer."); \ - } \ - \ - /* Constructor relying on user provided storage */ \ - SOA_DEVICE_ONLY CLASS(bool devConstructor, std::byte* mem, size_t nElements, size_t byteAlignment = defaultAlignment): \ - mem_(mem), nElements_(nElements), byteAlignment_(byteAlignment) { \ - auto curMem = mem_; \ - _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ - } \ - \ - struct const_element { \ - SOA_HOST_DEVICE_INLINE \ - const_element(size_t index, \ - /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__) \ - ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) \ - {} \ - _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_ACCESSOR, ~, __VA_ARGS__) \ - \ - private: \ - _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ - }; \ - \ - struct element { \ - SOA_HOST_DEVICE_INLINE \ - element(size_t index, \ - /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__) \ - ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) \ - {} \ - SOA_HOST_DEVICE_INLINE \ - element& operator=(const element& other) { \ - _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_COPY, ~, __VA_ARGS__) \ - return *this; \ - } \ - _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ - }; \ - \ - /* AoS-like accessor (non-const) */ \ - SOA_HOST_DEVICE_INLINE \ - element operator[](size_t index) { \ - rangeCheck(index); \ - return element(index, \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__) ); \ - } \ - \ - /* AoS-like accessor (const) */ \ - SOA_HOST_DEVICE_INLINE \ - const const_element operator[](size_t index) const { \ - rangeCheck(index); \ - return const_element(index, \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__) ); \ - } \ - \ - /* accessors */ \ - _ITERATE_ON_ALL(_DECLARE_SOA_ACCESSOR, ~, __VA_ARGS__) \ - _ITERATE_ON_ALL(_DECLARE_SOA_CONST_ACCESSOR, ~, __VA_ARGS__) \ - \ - /* dump the SoA internal structure */ \ - template SOA_HOST_ONLY friend void dump(); \ - \ -private: \ - /* Range checker conditional to the macro _DO_RANGECHECK */ \ - SOA_HOST_DEVICE_INLINE \ - void rangeCheck(size_t index) const { \ - if constexpr (_DO_RANGECHECK) { \ - if (index >= nElements_) { \ - printf("In " #CLASS "::rangeCheck(): index out of range: %zu with nElements: %zu\n", index, nElements_); \ - assert(false); \ - } \ - } \ - } \ - \ - /* data members */ \ - std::byte* mem_; \ - size_t nElements_; \ - size_t byteSize_; \ - size_t byteAlignment_; \ - _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ -} - -#endif // ndef DataStrcutures_SoAStore_h + */ \ + struct SoAMetadata { \ + friend CLASS; \ + SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ + SOA_HOST_DEVICE_INLINE size_t byteSize() const { return parent_.byteSize_; } \ + SOA_HOST_DEVICE_INLINE size_t byteAlignment() const { return CLASS::byteAlignment; } \ + SOA_HOST_DEVICE_INLINE std::byte* data() const { return parent_.mem_; } \ + SOA_HOST_DEVICE_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ + SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) { return CLASS(addr, parent_.nElements_); } \ + _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ + \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ + const CLASS& parent_; \ + typedef CLASS ParentClass; \ + }; \ + friend SoAMetadata; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + \ + /* Trivial constuctor */ \ + CLASS() \ + : mem_(nullptr), \ + nElements_(0), \ + byteSize_(0), \ + _ITERATE_ON_ALL_COMMA(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION, ~, __VA_ARGS__) {} \ + \ + /* Constructor relying on user provided storage */ \ + SOA_HOST_ONLY CLASS(std::byte* mem, size_t nElements) : mem_(mem), nElements_(nElements), byteSize_(0) { \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(mem) % byteAlignment) \ + throw std::out_of_range("In " #CLASS "::" #CLASS ": misaligned buffer"); \ + auto curMem = mem_; \ + _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ + /* Sanity check: we should have reached the computed size, only on host code */ \ + byteSize_ = computeDataSize(nElements_); \ + if (mem_ + byteSize_ != curMem) \ + throw std::out_of_range("In " #CLASS "::" #CLASS ": unexpected end pointer."); \ + } \ + \ + /* Constructor relying on user provided storage */ \ + SOA_DEVICE_ONLY CLASS(bool devConstructor, std::byte* mem, size_t nElements) : mem_(mem), nElements_(nElements) { \ + auto curMem = mem_; \ + _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ + } \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) {} \ + _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_ACCESSOR, ~, __VA_ARGS__) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ + }; \ + \ + struct element { \ + SOA_HOST_DEVICE_INLINE \ + element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) {} \ + SOA_HOST_DEVICE_INLINE \ + element& operator=(const element& other) { \ + _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_COPY, ~, __VA_ARGS__) \ + return *this; \ + } \ + _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ + }; \ + \ + /* AoS-like accessor (non-const) */ \ + SOA_HOST_DEVICE_INLINE \ + element operator[](size_t index) { \ + rangeCheck(index); \ + return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__)); \ + } \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const const_element operator[](size_t index) const { \ + rangeCheck(index); \ + return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__)); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_SOA_ACCESSOR, ~, __VA_ARGS__) \ + _ITERATE_ON_ALL(_DECLARE_SOA_CONST_ACCESSOR, ~, __VA_ARGS__) \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + \ + private: \ + /* Range checker conditional to the macro _DO_RANGECHECK */ \ + SOA_HOST_DEVICE_INLINE \ + void rangeCheck(size_t index) const { \ + if constexpr (_DO_RANGECHECK) { \ + if (index >= nElements_) { \ + printf("In " #CLASS "::rangeCheck(): index out of range: %zu with nElements: %zu\n", index, nElements_); \ + assert(false); \ + } \ + } \ + } \ + \ + /* data members */ \ + std::byte* mem_; \ + size_t nElements_; \ + size_t byteSize_; \ + _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ + } + +#endif // ndef DataStructures_SoAStore_h diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index b84127aab..b20578fab 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -5,22 +5,20 @@ * with varying columns lengths. */ -#ifndef DataStrcutures_SoAView_h -#define DataStrcutures_SoAView_h +#ifndef DataStructures_SoAView_h +#define DataStructures_SoAView_h #include "SoACommon.h" -#define SoA_view_store(TYPE, NAME) \ - (TYPE, NAME) +#define SoA_view_store(TYPE, NAME) (TYPE, NAME) -#define SoA_view_store_list(...) \ - __VA_ARGS__ +#define SoA_view_store_list(...) __VA_ARGS__ -#define SoA_view_value(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - (STORE_NAME, STORE_MEMBER, LOCAL_NAME) +#define SoA_view_value(STORE_NAME, STORE_MEMBER) (STORE_NAME, STORE_MEMBER, STORE_MEMBER) -#define SoA_view_value_list(...) \ - __VA_ARGS__ +#define SoA_view_value_rename(STORE_NAME, STORE_MEMBER, LOCAL_NAME) (STORE_NAME, STORE_MEMBER, LOCAL_NAME) + +#define SoA_view_value_list(...) __VA_ARGS__ /* * A macro defining a SoA view (collection of coluns from multiple stores) @@ -45,331 +43,392 @@ * Members definitions macros for viewa */ - /** * Store types aliasing for referencing by name */ -#define _DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL(TYPE, NAME) \ - typedef TYPE BOOST_PP_CAT(TypeOf_, NAME); +#define _DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL(TYPE, NAME) typedef TYPE BOOST_PP_CAT(TypeOf_, NAME); -#define _DECLARE_VIEW_STORE_TYPE_ALIAS(R, DATA, TYPE_NAME) \ +#define _DECLARE_VIEW_STORE_TYPE_ALIAS(R, DATA, TYPE_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL TYPE_NAME) /** * Member types aliasing for referencing by name */ -#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ - typedef BOOST_PP_CAT(TypeOf_, STORE_NAME) :: SoAMetadata:: BOOST_PP_CAT(TypeOf_, STORE_MEMBER) BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ - BOOST_PP_CAT(TypeOf_, STORE_NAME) :: SoAMetadata:: BOOST_PP_CAT(ColumnTypeOf_, STORE_MEMBER); \ - DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME) () const { \ - return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ - }; \ - static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::eigen, "Eigen columns not supported in views."); - -#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + typedef typename BOOST_PP_CAT(TypeOf_, STORE_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, STORE_MEMBER) \ + BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ + static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ + BOOST_PP_CAT(TypeOf_, STORE_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, STORE_MEMBER); \ + SOA_HOST_DEVICE_INLINE \ + DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ + return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ + }; \ + static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::eigen, \ + "Eigen columns not supported in views."); \ + static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::scalar, "Scalars not supported in views."); + +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) /** * Member assignment for trivial constructor */ -#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - ( BOOST_PP_CAT(LOCAL_NAME, _) (nullptr) ) +#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + (BOOST_PP_CAT(LOCAL_NAME, _)(nullptr)) -#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL STORE_MEMBER_NAME) /** - * Generator of parameters (stores) for constructor. + * Generator of parameters (stores) for constructor by stores. */ -#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(STORE_TYPE, STORE_NAME, DATA) \ - ( DATA STORE_TYPE & STORE_NAME ) +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(STORE_TYPE, STORE_NAME, DATA) (DATA STORE_TYPE & STORE_NAME) -#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS(R, DATA, TYPE_NAME) \ +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS(R, DATA, TYPE_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) +/** + * Generator of parameters (stores) for constructor by column. + */ +#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) + +#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) + /** * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(STORE, MEMBER, NAME) \ - ( BOOST_PP_CAT(NAME, _) ( [&]() -> auto { \ - static_assert ( BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::eigen, \ - "Eigen values not supported in views" ); \ - return STORE . soaMetadata() . BOOST_PP_CAT(addressOf_, MEMBER) (); \ - }() ) ) - - -#define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(STORE, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ + static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::eigen, \ + "Eigen values not supported in views"); \ + static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::scalar, \ + "Scalar values not supported in views"); \ + auto addr = STORE.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(addr) % byteAlignment) \ + throw std::out_of_range("In store constructor: misaligned column: " #NAME); \ + return addr; \ + }())) + +#define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL STORE_MEMBER_NAME) +/** + * Generator of member initialization from constructor. + * We use a lambda with auto return type to handle multiple possible return types. + */ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(STORE, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(NAME) % byteAlignment) \ + throw std::out_of_range("In store constructor: misaligned column: " #NAME); \ + return NAME; \ + }())) + +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN(R, DATA, STORE_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL STORE_MEMBER_NAME) + /** * Generator of element members initializer. */ -#define _DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL(STORE, MEMBER, LOCAL_NAME, DATA) \ - ( LOCAL_NAME (DATA, LOCAL_NAME) ) +#define _DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL(STORE, MEMBER, LOCAL_NAME, DATA) (LOCAL_NAME(DATA, LOCAL_NAME)) -#define _DECLARE_VIEW_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) /** * Helper macro extracting the data type from a column accessor in a store */ -#define _COLUMN_TYPE(STORE_NAME, STORE_MEMBER) \ - typename std::remove_pointer< decltype (BOOST_PP_CAT(STORE_NAME, Type)() :: STORE_MEMBER () ) >::type +#define _COLUMN_TYPE(STORE_NAME, STORE_MEMBER) \ + typename std::remove_pointer::type /** * Generator of parameters for (non-const) element subclass (expanded comma separated). */ -#define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ - ( DATA BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME ) +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) -#define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, STORE_MEMBER_NAME) \ - _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK (STORE_MEMBER_NAME, DATA) +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, STORE_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA) /** * Generator of member initialization for constructor of element subclass */ -#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ - (BOOST_PP_CAT(LOCAL_NAME, _) (DATA, LOCAL_NAME) ) +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + (BOOST_PP_CAT(LOCAL_NAME, _)(DATA, LOCAL_NAME)) /* declare AoS-like element value args for contructor; these should expand,for columns only */ -#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) - /** * Declaration of the members accessors of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME() const { \ - return BOOST_PP_CAT(LOCAL_NAME, _)(); \ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _)(); \ } -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL STORE_MEMBER_NAME /** * Declaration of the private members of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - const SoAConstValue< BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) > BOOST_PP_CAT(LOCAL_NAME, _); \ +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + const SoAConstValueWithConf BOOST_PP_CAT(LOCAL_NAME, _); -#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME /** * Generator of the member-by-member copy operator of the element subclass. */ -#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - LOCAL_NAME() = other.LOCAL_NAME(); +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) LOCAL_NAME() = other.LOCAL_NAME(); -#define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL STORE_MEMBER_NAME) /** * Declaration of the private members of the const element subclass */ -#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - SoAValue LOCAL_NAME; \ +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + SoAValueWithConf LOCAL_NAME; -#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME /** * Parameters passed to element subclass constructor in operator[] */ -#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - (BOOST_PP_CAT(LOCAL_NAME, _)) +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) (BOOST_PP_CAT(LOCAL_NAME, _)) -#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL STORE_MEMBER_NAME) /** * Direct access to column pointer and indexed access */ -#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME() { return BOOST_PP_CAT(LOCAL_NAME, _); } \ - SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) & LOCAL_NAME(size_t index) { \ - return BOOST_PP_CAT(LOCAL_NAME, _)[index]; \ - } - -#define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + /* Column */ \ + SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME() { \ + return BOOST_PP_CAT(LOCAL_NAME, _); \ + } \ + SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) & LOCAL_NAME(size_t index) { \ + return BOOST_PP_CAT(LOCAL_NAME, _)[index]; \ + } + +#define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL STORE_MEMBER_NAME) /** * Direct access to column pointer (const) and indexed access. */ -#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) const * LOCAL_NAME() const { \ - return BOOST_PP_CAT(LOCAL_NAME, _); \ - } \ - SOA_HOST_DEVICE_INLINE BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME(size_t index) const { \ - return *(BOOST_PP_CAT(LOCAL_NAME, _) + index); \ - } - -#define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + /* Column */ \ + SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) const* LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _); \ + } \ + SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME(size_t index) const { \ + return *(BOOST_PP_CAT(LOCAL_NAME, _) + index); \ + } + +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL STORE_MEMBER_NAME) /** * SoA class member declaration (column pointers). */ -#define _DECLARE_VIEW_SOA_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ - DATA BOOST_PP_CAT( SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _) = nullptr; +#define _DECLARE_VIEW_SOA_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ + DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _) = nullptr; -#define _DECLARE_VIEW_SOA_MEMBER(R, DATA, STORE_MEMBER_NAME) \ +#define _DECLARE_VIEW_SOA_MEMBER(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) -#define generate_SoA_view(CLASS, STORES_LIST, VALUE_LIST) \ -struct CLASS { \ - \ - /* these could be moved to an external type trait to free up the symbol names */ \ - using self_type = CLASS; \ - \ - /** \ +#define generate_SoA_view(CLASS, STORES_LIST, VALUE_LIST) \ + template \ + struct CLASS { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static size_t defaultAlignment = 128; \ + constexpr static size_t byteAlignment = ALIGNMENT; \ + constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ + constexpr static size_t conditionalAlignment = \ + alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = SoAValue; \ + \ + template \ + using SoAConstValueWithConf = SoAConstValue; \ + \ + template \ + using SoAEigenValueWithConf = SoAEigenValue; \ + /** \ * Helper/friend class allowing SoA introspection. \ - */ \ - struct SoAMetadata { \ - friend CLASS; \ - /* Alias store types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ - \ - /* Alias member types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ - private: \ - SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ - const CLASS& parent_; \ - }; \ - friend SoAMetadata ; \ - SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ - \ - /* Trivial constuctor */ \ - CLASS(): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) \ - {} \ - \ - /* Constructor relying on user provided stores */ \ - SOA_HOST_ONLY CLASS ( _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), STORES_LIST) ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ - \ - struct const_element { \ - SOA_HOST_DEVICE_INLINE \ - const_element(size_t index, \ - /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST) \ - ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) \ - {} \ - _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ - \ - private: \ - _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ - }; \ - \ - struct element { \ - SOA_HOST_DEVICE_INLINE \ - element(size_t index, \ - /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, BOOST_PP_EMPTY(), VALUE_LIST) \ - ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEM_MEMBER_INIT, index, VALUE_LIST) \ - {} \ - SOA_HOST_DEVICE_INLINE \ - element& operator=(const element& other) { \ - _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_COPY, ~, VALUE_LIST) \ - return *this; \ - } \ - _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ - }; \ - \ - /* AoS-like accessor (non-const) */ \ - SOA_HOST_DEVICE_INLINE \ - element operator[](size_t index) { \ - return element(index, \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST) ); \ - } \ - \ - /* AoS-like accessor (const) */ \ - SOA_HOST_DEVICE_INLINE \ - const const_element operator[](size_t index) const { \ - return const_element(index, \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST) ); \ - } \ - \ - /* accessors */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_ACCESSOR, ~, VALUE_LIST) \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ - \ - /* dump the SoA internal structure */ \ - template SOA_HOST_ONLY friend void dump(); \ - \ -private: \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ -} - -#define generate_SoA_const_view(CLASS, STORES_LIST, VALUE_LIST) \ -struct CLASS { \ - \ - /* these could be moved to an external type trait to free up the symbol names */ \ - using self_type = CLASS; \ - \ + */ \ + struct SoAMetadata { \ + friend CLASS; \ + /* Alias store types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ + const CLASS& parent_; \ + }; \ + friend SoAMetadata; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + \ + /* Trivial constuctor */ \ + CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on user provided stores */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), STORES_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on individually provided column addresses */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, \ + BOOST_PP_EMPTY(), \ + VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + struct element { \ + SOA_HOST_DEVICE_INLINE \ + element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, BOOST_PP_EMPTY(), VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + SOA_HOST_DEVICE_INLINE \ + element& operator=(const element& other) { \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_COPY, ~, VALUE_LIST) \ + return *this; \ + } \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + /* AoS-like accessor (non-const) */ \ + SOA_HOST_DEVICE_INLINE \ + element operator[](size_t index) { \ + return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ + } \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const const_element operator[](size_t index) const { \ + return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_ACCESSOR, ~, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ + } + +#define generate_SoA_const_view(CLASS, STORES_LIST, VALUE_LIST) \ + template \ + struct CLASS { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static size_t defaultAlignment = 128; \ + constexpr static size_t byteAlignment = ALIGNMENT; \ + constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ + constexpr static size_t conditionalAlignment = \ + alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = SoAValue; \ + \ + template \ + using SoAConstValueWithConf = SoAConstValue; \ + \ + template \ + using SoAEigenValueWithConf = SoAEigenValue; \ + \ /** \ * Helper/friend class allowing SoA introspection. \ - */ \ - struct SoAMetadata { \ - friend CLASS; \ - /* Alias store types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ - \ - /* Alias member types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ - private: \ - SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent): parent_(parent) {} \ - const CLASS& parent_; \ - }; \ - friend SoAMetadata ; \ - SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ - \ - /* Trivial constuctor */ \ - CLASS(): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) \ - {} \ - \ - /* Constructor relying on user provided stores */ \ - SOA_HOST_ONLY CLASS ( _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, STORES_LIST) ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ - \ - struct const_element { \ - SOA_HOST_DEVICE_INLINE \ - const_element(size_t index, \ - /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST) \ - ): \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) \ - {} \ - _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ - \ - private: \ - _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ - }; \ - \ - /* AoS-like accessor (const) */ \ - SOA_HOST_DEVICE_INLINE \ - const const_element operator[](size_t index) const { \ - return const_element(index, \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST) ); \ - } \ - \ - /* accessors */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ - \ - /* dump the SoA internal structure */ \ - template SOA_HOST_ONLY friend void dump(); \ - \ -private: \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ -} - -#endif // ndef DataStrcutures_SoAView_h \ No newline at end of file + */ \ + struct SoAMetadata { \ + friend CLASS; \ + /* Alias store types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ + const CLASS& parent_; \ + }; \ + friend SoAMetadata; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + \ + /* Trivial constuctor */ \ + CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on user provided stores */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, STORES_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on individually provided column addresses */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, const, VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const const_element operator[](size_t index) const { \ + return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ + } + +#endif // ndef DataStructures_SoAView_h From c38be2004830e59cef9b2b260f8602a11316979c Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 2 Dec 2021 14:09:57 +0100 Subject: [PATCH 17/50] [cudadev] Ported cudadev to the new SoA layout. The SoA macros now generate templates, adapted code and added using clauses to select the proper variant. Introduced separate, specifically const constViews as const views can be worked around by copying, and const correctness is insufficient. Applied the previous to the SiPixelROCsStatusAndMappingWrapper product. Moved alignment value from runtime to compile time parameter. Code simplifications. --- .../CUDADataFormats/SiPixelClustersCUDA.h | 21 ++++-- .../CUDADataFormats/SiPixelDigisCUDA.h | 40 ++++++---- .../TrackingRecHit2DHeterogeneous.h | 14 ++-- .../TrackingRecHit2DHostSOAStore.cc | 2 +- .../CUDADataFormats/TrackingRecHit2DSOAView.h | 42 ++++++----- .../CondFormats/SiPixelROCsStatusAndMapping.h | 24 +++++- .../SiPixelROCsStatusAndMappingWrapper.cc | 6 +- .../SiPixelROCsStatusAndMappingWrapper.h | 11 +-- src/cudadev/DataFormats/SoACommon.h | 64 ++++++++++++++++ src/cudadev/DataFormats/SoAStore.h | 6 +- src/cudadev/DataFormats/SoAView.h | 75 +++++++++++++------ ...elROCsStatusAndMappingWrapperESProducer.cc | 6 +- .../SiPixelRawToClusterCUDA.cc | 2 +- .../SiPixelRawToClusterGPUKernel.cu | 8 +- .../SiPixelRawToClusterGPUKernel.h | 4 +- src/cudadev/test/SoAStoreAndView_t.cu | 64 +++++++++------- 16 files changed, 267 insertions(+), 122 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 79914c045..51f8087ef 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -11,7 +11,7 @@ class SiPixelClustersCUDA { public: - generate_SoA_store(DeviceStore, + generate_SoA_store(DeviceStoreTemplate, SoA_column(uint32_t, moduleStart), // index of the first pixel of each module SoA_column(uint32_t, clusInModule), // number of clusters found in each module SoA_column(uint32_t, moduleId), // module id of each module @@ -19,19 +19,24 @@ class SiPixelClustersCUDA { // originally from rechits SoA_column(uint32_t, clusModuleStart) // index of the first cluster of each module ); + + // We use all defaults for the template parameters. + using DeviceStore = DeviceStoreTemplate<>; - generate_SoA_const_view(DeviceConstView, + generate_SoA_const_view(DeviceConstViewTemplate, SoA_view_store_list(SoA_view_store(DeviceStore, deviceStore)), SoA_view_value_list( - SoA_view_value(deviceStore, moduleStart, moduleStart), // index of the first pixel of each module - SoA_view_value(deviceStore, clusInModule, clusInModule), // number of clusters found in each module - SoA_view_value(deviceStore, moduleId, moduleId), // module id of each module - + SoA_view_value(deviceStore, moduleStart), // index of the first pixel of each module + SoA_view_value(deviceStore, clusInModule), // number of clusters found in each module + SoA_view_value(deviceStore, moduleId), // module id of each module + // originally from rechits - SoA_view_value(deviceStore, clusModuleStart, clusModuleStart) // index of the first cluster of each module + SoA_view_value(deviceStore, clusModuleStart) // index of the first cluster of each module ) ); - + + using DeviceConstView = DeviceConstViewTemplate<>; + explicit SiPixelClustersCUDA(); explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream); ~SiPixelClustersCUDA() = default; diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index 48231aad8..7b23dd8dd 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -11,14 +11,16 @@ class SiPixelDigisCUDA { public: - generate_SoA_store(DeviceOnlyStore, + generate_SoA_store(DeviceOnlyStoreTemplate, /* These are consumed by downstream device code */ SoA_column(uint16_t, xx), /* local coordinates of each pixel */ SoA_column(uint16_t, yy), /* */ SoA_column(uint16_t, moduleInd) /* module id of each pixel */ ); - generate_SoA_store(HostDeviceStore, + using DeviceOnlyStore = DeviceOnlyStoreTemplate<>; + + generate_SoA_store(HostDeviceStoreTemplate, /* These are also transferred to host (see HostDataView) */ SoA_column(uint16_t, adc), /* ADC of each pixel */ SoA_column(int32_t, clus), /* cluster id of each pixel */ @@ -28,38 +30,44 @@ class SiPixelDigisCUDA { SoA_column(uint32_t, rawIdArr) /* DetId of each pixel */ ); - generate_SoA_view(DeviceFullView, + using HostDeviceStore = HostDeviceStoreTemplate<>; + + generate_SoA_view(DeviceFullViewTemplate, SoA_view_store_list( SoA_view_store(DeviceOnlyStore, deviceOnly), SoA_view_store(HostDeviceStore, hostDevice) ), SoA_view_value_list( - SoA_view_value(deviceOnly, xx, xx), /* local coordinates of each pixel */ - SoA_view_value(deviceOnly, yy, yy), /* */ - SoA_view_value(deviceOnly, moduleInd, moduleInd), /* module id of each pixel */ - SoA_view_value(hostDevice, adc, adc), /* ADC of each pixel */ - SoA_view_value(hostDevice, clus, clus),/* cluster id of each pixel */ - SoA_view_value(hostDevice, pdigi, pdigi), /* packed digi (row, col, adc) of each pixel */ - SoA_view_value(hostDevice, rawIdArr, rawIdArr) /* DetId of each pixel */ + SoA_view_value(deviceOnly, xx), /* local coordinates of each pixel */ + SoA_view_value(deviceOnly, yy), /* */ + SoA_view_value(deviceOnly, moduleInd),/* module id of each pixel */ + SoA_view_value(hostDevice, adc), /* ADC of each pixel */ + SoA_view_value(hostDevice, clus), /* cluster id of each pixel */ + SoA_view_value(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ + SoA_view_value(hostDevice, rawIdArr) /* DetId of each pixel */ /* TODO: simple, no rename interface */ ) ); + + using DeviceFullView = DeviceFullViewTemplate<>; /* Device pixel view: this is a second generation view (view from view) */ - generate_SoA_const_view(DevicePixelConstView, + generate_SoA_const_view(DevicePixelConstViewTemplate, /* We get out data from the DeviceFullStore */ SoA_view_store_list( SoA_view_store(DeviceFullView, deviceFullView) ), /* These are consumed by downstream device code */ SoA_view_value_list( - SoA_view_value(deviceFullView, xx, xx), /* local coordinates of each pixel */ - SoA_view_value(deviceFullView, yy, yy), /* */ - SoA_view_value(deviceFullView, moduleInd, moduleInd), /* module id of each pixel */ - SoA_view_value(deviceFullView, adc, adc), /* ADC of each pixel */ - SoA_view_value(deviceFullView, clus, clus) /* cluster id of each pixel */ + SoA_view_value(deviceFullView, xx), /* local coordinates of each pixel */ + SoA_view_value(deviceFullView, yy), /* */ + SoA_view_value(deviceFullView, moduleInd), /* module id of each pixel */ + SoA_view_value(deviceFullView, adc), /* ADC of each pixel */ + SoA_view_value(deviceFullView, clus) /* cluster id of each pixel */ ) ); + + using DevicePixelConstView = DevicePixelConstViewTemplate<>; explicit SiPixelDigisCUDA(); explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h index 424105bfe..7fa8871d8 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h @@ -108,24 +108,20 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH // In order to simplify code, we align all to the minimum necessary size (sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type)). { // Simplify a bit following computations - const size_t align = sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type); const size_t phiBinnerByteSize = (phase1PixelTopology::numberOfLayers + 1) * sizeof (TrackingRecHit2DSOAStore::PhiBinner::index_type); // Allocate the buffer m_hitsSupportLayerStartStore = Traits::template make_device_unique ( - TrackingRecHit2DSOAStore::HitsStore::computeDataSize(m_nHits, align) + - TrackingRecHit2DSOAStore::SupportObjectsStore::computeDataSize(m_nHits, align) + + TrackingRecHit2DSOAStore::HitsStore::computeDataSize(m_nHits) + + TrackingRecHit2DSOAStore::SupportObjectsStore::computeDataSize(m_nHits) + phiBinnerByteSize, stream); // Split the buffer in stores and array - store->m_hitsStore.~HitsStore(); - new (&store->m_hitsStore) TrackingRecHit2DSOAStore::HitsStore(m_hitsSupportLayerStartStore.get(), nHits, align); - store->m_supportObjectsStore.~SupportObjectsStore(); - new (&store->m_supportObjectsStore) TrackingRecHit2DSOAStore::SupportObjectsStore(store->m_hitsStore.soaMetadata().nextByte(), nHits, 1); + store->m_hitsStore = TrackingRecHit2DSOAStore::HitsStore(m_hitsSupportLayerStartStore.get(), nHits); + store->m_supportObjectsStore = TrackingRecHit2DSOAStore::SupportObjectsStore(store->m_hitsStore.soaMetadata().nextByte(), nHits); m_hitsLayerStart = store->m_hitsLayerStart = reinterpret_cast (store->m_supportObjectsStore.soaMetadata().nextByte()); // Record additional references - store->m_hitsAndSupportView.~HitsAndSupportView(); - new (&store->m_hitsAndSupportView) TrackingRecHit2DSOAStore::HitsAndSupportView( + store->m_hitsAndSupportView = TrackingRecHit2DSOAStore::HitsAndSupportView( store->m_hitsStore, store->m_supportObjectsStore ); diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc index 65b35f5e4..36d2848da 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc @@ -11,5 +11,5 @@ void TrackingRecHit2DHostSOAStore::reset() { TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream): hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsStore::computeDataSize(size), stream)), - hitsStore_(hits_h.get(), size, 1 /* byte alignement */) + hitsStore_(hits_h.get(), size) {} diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index 37315ef49..16ededc67 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -31,7 +31,7 @@ class TrackingRecHit2DSOAStore { // Sill, we need the 32 bits integers to be aligned, so we simply declare the SoA with the 32 bits fields first // and the 16 bits behind (as they have a looser alignment requirement. Then the SoA can be create with a byte // alignment of 1) - generate_SoA_store(HitsStore, + generate_SoA_store(HitsStoreTemplate, // 32 bits section // local coord SoA_column(float, xLocal), @@ -54,7 +54,10 @@ class TrackingRecHit2DSOAStore { SoA_column(int16_t, clusterSizeY) ); - generate_SoA_store(SupportObjectsStore, + // The hits store does not use default alignment but a more relaxed one. + using HitsStore = HitsStoreTemplate; + + generate_SoA_store(SupportObjectsStoreTemplate, // This is the end of the data which is transferred to host. The following columns are supporting // objects, not transmitted @@ -68,32 +71,37 @@ class TrackingRecHit2DSOAStore { SoA_column(uint16_t, detectorIndex) ); - generate_SoA_view(HitsAndSupportView, + // The support objects store also not use default alignment but a more relaxed one. + using SupportObjectsStore = SupportObjectsStoreTemplate; + + generate_SoA_view(HitsAndSupportViewTemplate, SoA_view_store_list( SoA_view_store(HitsStore, hitsStore), SoA_view_store(SupportObjectsStore, supportObjectsStore) ), SoA_view_value_list( - SoA_view_value(hitsStore, xLocal, xLocal), - SoA_view_value(hitsStore, yLocal, yLocal), - SoA_view_value(hitsStore, xerrLocal, xerrLocal), - SoA_view_value(hitsStore, yerrLocal, yerrLocal), + SoA_view_value(hitsStore, xLocal), + SoA_view_value(hitsStore, yLocal), + SoA_view_value(hitsStore, xerrLocal), + SoA_view_value(hitsStore, yerrLocal), - SoA_view_value(hitsStore, xGlobal, xGlobal), - SoA_view_value(hitsStore, yGlobal, yGlobal), - SoA_view_value(hitsStore, zGlobal, zGlobal), - SoA_view_value(hitsStore, rGlobal, rGlobal), + SoA_view_value(hitsStore, xGlobal), + SoA_view_value(hitsStore, yGlobal), + SoA_view_value(hitsStore, zGlobal), + SoA_view_value(hitsStore, rGlobal), - SoA_view_value(hitsStore, charge, charge), - SoA_view_value(hitsStore, clusterSizeX, clusterSizeX), - SoA_view_value(hitsStore, clusterSizeY, clusterSizeY), + SoA_view_value(hitsStore, charge), + SoA_view_value(hitsStore, clusterSizeX), + SoA_view_value(hitsStore, clusterSizeY), - SoA_view_value(supportObjectsStore, phiBinnerStorage, phiBinnerStorage), - SoA_view_value(supportObjectsStore, iphi, iphi), - SoA_view_value(supportObjectsStore, detectorIndex, detectorIndex) + SoA_view_value(supportObjectsStore, phiBinnerStorage), + SoA_view_value(supportObjectsStore, iphi), + SoA_view_value(supportObjectsStore, detectorIndex) ) ); + using HitsAndSupportView = HitsAndSupportViewTemplate; + // Shortcut operator saving the explicit calls to view in usage. __device__ __forceinline__ HitsAndSupportView::element operator[] (size_t index) { return m_hitsAndSupportView[index]; diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index f46f79da4..75f246647 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -2,6 +2,7 @@ #define CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h #include "DataFormats/SoAStore.h" +#include "DataFormats/SoAView.h" namespace pixelgpudetails { // Maximum fed for phase1 is 150 but not all of them are filled @@ -13,7 +14,7 @@ namespace pixelgpudetails { constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); } // namespace pixelgpudetails -generate_SoA_store(SiPixelROCsStatusAndMapping, +generate_SoA_store(SiPixelROCsStatusAndMappingStoreTemplate, SoA_column(unsigned int, fed), SoA_column(unsigned int, link), SoA_column(unsigned int, roc), @@ -24,4 +25,25 @@ generate_SoA_store(SiPixelROCsStatusAndMapping, SoA_scalar(unsigned int, size) ); +using SiPixelROCsStatusAndMappingStore = SiPixelROCsStatusAndMappingStoreTemplate<>; + +generate_SoA_const_view(SiPixelROCsStatusAndMappingConstViewTemplate, + SoA_view_store_list(SoA_view_store(SiPixelROCsStatusAndMappingStore, mappingStore)), + SoA_view_value_list( + SoA_view_value(mappingStore, fed), + SoA_view_value(mappingStore, link), + SoA_view_value(mappingStore, roc), + SoA_view_value(mappingStore, rawId), + SoA_view_value(mappingStore, rocInDet), + SoA_view_value(mappingStore, moduleId), + SoA_view_value(mappingStore, badRocs), + SoA_view_value(mappingStore, size) + ) +); + +// Slightly more complex than using, but allows forward declarations. +struct SiPixelROCsStatusAndMappingConstView: public SiPixelROCsStatusAndMappingConstViewTemplate<> { + using SiPixelROCsStatusAndMappingConstViewTemplate<>::SiPixelROCsStatusAndMappingConstViewTemplate; +}; + #endif // CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc index c3eafcaf3..7b1ef9c10 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc @@ -15,7 +15,7 @@ #include "CondFormats/SiPixelROCsStatusAndMappingWrapper.h" #include "CUDACore/copyAsync.h" -SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const& cablingMap, +SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingStore const& cablingMap, std::vector modToUnp) : modToUnpDefault(modToUnp.size()), hasQuality_(true) { // TODO: check if cudaStreamDefault is appropriate @@ -25,7 +25,7 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelRO std::copy(modToUnp.begin(), modToUnp.end(), modToUnpDefault.begin()); } -const SiPixelROCsStatusAndMapping & SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync(cudaStream_t cudaStream) const { +SiPixelROCsStatusAndMappingConstView SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync(cudaStream_t cudaStream) const { const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { // allocate @@ -35,7 +35,7 @@ const SiPixelROCsStatusAndMapping & SiPixelROCsStatusAndMappingWrapper::getGPUPr data.cablingMapDevice.soaMetadata().byteSize(), stream); } ); - return data.cablingMapDevice; + return SiPixelROCsStatusAndMappingConstView(data.cablingMapDevice); } const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 2d9a15bba..3ecd86131 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -13,13 +13,14 @@ class SiPixelROCsStatusAndMappingWrapper { public: - explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const &cablingMap, + /* This is using a store as the size is needed. TODO: use views when views start embedding size. */ + explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingStore const &cablingMap, std::vector modToUnp); bool hasQuality() const { return hasQuality_; } // returns pointer to GPU memory - const SiPixelROCsStatusAndMapping & getGPUProductAsync(cudaStream_t cudaStream) const; + SiPixelROCsStatusAndMappingConstView getGPUProductAsync(cudaStream_t cudaStream) const; // returns pointer to GPU memory const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const; @@ -33,11 +34,11 @@ class SiPixelROCsStatusAndMappingWrapper { struct GPUData { void allocate(size_t size, cudaStream_t stream) { cablingMapDeviceBuffer = cms::cuda::make_device_unique( - SiPixelROCsStatusAndMapping::computeDataSize(size), stream); - cablingMapDevice = SiPixelROCsStatusAndMapping(cablingMapDeviceBuffer.get(), size); + SiPixelROCsStatusAndMappingStore::computeDataSize(size), stream); + cablingMapDevice = SiPixelROCsStatusAndMappingStore(cablingMapDeviceBuffer.get(), size); } cms::cuda::device::unique_ptr cablingMapDeviceBuffer; - SiPixelROCsStatusAndMapping cablingMapDevice = SiPixelROCsStatusAndMapping(nullptr, 0); // map struct in GPU + SiPixelROCsStatusAndMappingStore cablingMapDevice = SiPixelROCsStatusAndMappingStore(nullptr, 0); // map struct in GPU }; cms::cuda::ESProduct gpuData_; diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index a3550f023..48f2c5172 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -174,6 +174,70 @@ enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COL IF_COLUMN, \ BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), IF_EIGEN_COLUMN, BOOST_PP_EMPTY()))) +/* Column accessors: templates implementing the global accesors (soa::x() and soa::x(index) */ +enum class SoAAccessType: bool { mutableAccess, constAccess }; + +template +struct SoAColumnAccessorsImpl {}; + + +// Todo: add alignment support. +// Sfinae based const/non const variants. +// Column +template +struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T * baseAddress): baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE T * operator()() { return baseAddress_; } + SOA_HOST_DEVICE_INLINE T & operator()(size_t index) { return baseAddress_[index]; } +private: + T * baseAddress_; +}; + +// Const column +template +struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T * baseAddress): baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE const T * operator()() const { return baseAddress_; } + SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return baseAddress_[index]; } +private: + const T * baseAddress_; +}; + +// Scalar +template +struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T * baseAddress): baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE T & operator() () { return *baseAddress_; } + SOA_HOST_DEVICE_INLINE void operator() (size_t index) const { assert (false && "Indexed access impossible for SoA scalars."); } +private: + T * baseAddress_; +}; + +// Const scalar +template +struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T * baseAddress): baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE T operator() () const { return *baseAddress_; } + SOA_HOST_DEVICE_INLINE void operator() (size_t index) const { assert (false && "Indexed access impossible for SoA scalars."); } +private: + const T * baseAddress_; +}; + +/* A helper template stager avoiding comma in macros */ +template +struct SoAAccessors{ + using myInt = int; + template + struct ColumnType { + using myInt = int; + template + struct AccessType: public SoAColumnAccessorsImpl { + using myInt = int; + using SoAColumnAccessorsImpl::SoAColumnAccessorsImpl; + }; + }; +}; + /* Enum parameters allowing templated control of store/view behaviors */ /* Alignement enforcement verifies every column is aligned, and * hints the compiler that it can expect column pointers to be aligned */ diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoAStore.h index ca1b4814a..d432fa579 100644 --- a/src/cudadev/DataFormats/SoAStore.h +++ b/src/cudadev/DataFormats/SoAStore.h @@ -98,19 +98,19 @@ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::scalar; \ + constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::scalar; \ CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::column; \ + constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::column; \ CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Eigen column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::eigen; \ + constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::eigen; \ CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }) #define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index b20578fab..2c0c8a3f5 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -39,6 +39,31 @@ * */ +/* Traits for the different column type scenarios */ +/* Value traits passes the class as is in the case of column type and return + * an empty class with functions returning non-scalar as accessors. */ +template +struct ConstValueTraits {}; + +template +struct ConstValueTraits : public C { using C::C; }; + +template +struct ConstValueTraits { + // Just take to SoAValue type to generate the right constructor. + ConstValueTraits(size_t, const typename C::valueType *) {} + // Any attempt to do anything with the "scalar" value a const element will fail. +}; + +template +struct ConstValueTraits { + // Just take to SoAValue type to generate the right constructor. + ConstValueTraits(size_t, const typename C::valueType *) {} + // TODO: implement + // Any attempt to do anything with the eigen value a const element will fail. +}; + +#include /* * Members definitions macros for viewa */ @@ -57,15 +82,14 @@ #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ typedef typename BOOST_PP_CAT(TypeOf_, STORE_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, STORE_MEMBER) \ BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ - static const SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ + constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ BOOST_PP_CAT(TypeOf_, STORE_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, STORE_MEMBER); \ SOA_HOST_DEVICE_INLINE \ DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ }; \ static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::eigen, \ - "Eigen columns not supported in views."); \ - static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::scalar, "Scalars not supported in views."); + "Eigen columns not supported in views."); #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) @@ -104,8 +128,6 @@ (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::eigen, \ "Eigen values not supported in views"); \ - static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::scalar, \ - "Scalar values not supported in views"); \ auto addr = STORE.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ if (reinterpret_cast(addr) % byteAlignment) \ @@ -179,7 +201,10 @@ * Declaration of the private members of the const element subclass */ #define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - const SoAConstValueWithConf BOOST_PP_CAT(LOCAL_NAME, _); + const ConstValueTraits< \ + SoAConstValueWithConf, \ + BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, LOCAL_NAME) \ + > BOOST_PP_CAT(LOCAL_NAME, _); #define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME @@ -212,14 +237,18 @@ /** * Direct access to column pointer and indexed access */ -#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME() { \ - return BOOST_PP_CAT(LOCAL_NAME, _); \ - } \ - SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) & LOCAL_NAME(size_t index) { \ - return BOOST_PP_CAT(LOCAL_NAME, _)[index]; \ - } +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ + return typename SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + } \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ + return typename SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ + } #define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL STORE_MEMBER_NAME) @@ -228,13 +257,17 @@ * Direct access to column pointer (const) and indexed access. */ #define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ - /* Column */ \ - SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) const* LOCAL_NAME() const { \ - return BOOST_PP_CAT(LOCAL_NAME, _); \ - } \ - SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME(size_t index) const { \ - return *(BOOST_PP_CAT(LOCAL_NAME, _) + index); \ - } + /* Column or scalar */ \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ + return typename SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + } \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) const { \ + return typename SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ + } #define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL STORE_MEMBER_NAME) diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc index e29ccbbcc..10b9c45fd 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -31,13 +31,15 @@ void SiPixelROCsStatusAndMappingWrapperESProducer::produce(edm::EventSetup& even std::ifstream in(data_ / "cablingMap.bin", std::ios::binary); in.exceptions(std::ifstream::badbit | std::ifstream::failbit | std::ifstream::eofbit); // We use default alignment - auto objBuffer = std::make_unique(SiPixelROCsStatusAndMapping::computeDataSize(pixelgpudetails::MAX_SIZE)); - SiPixelROCsStatusAndMapping obj(objBuffer.get(), pixelgpudetails::MAX_SIZE); + auto objBuffer = std::make_unique(SiPixelROCsStatusAndMappingStore::computeDataSize(pixelgpudetails::MAX_SIZE)); + SiPixelROCsStatusAndMappingStore obj(objBuffer.get(), pixelgpudetails::MAX_SIZE); in.read(reinterpret_cast(obj.soaMetadata().data()), obj.soaMetadata().byteSize()); unsigned int modToUnpDefSize; in.read(reinterpret_cast(&modToUnpDefSize), sizeof(unsigned int)); std::vector modToUnpDefault(modToUnpDefSize); in.read(reinterpret_cast(modToUnpDefault.data()), modToUnpDefSize); + // SiPixelROCsStatusAndMappingWrapper constructor will copy the objBuffer to a pinned host memory buffer + // the deallocation of objBuffer at the end of this scope is intentional. eventSetup.put(std::make_unique(obj, std::move(modToUnpDefault))); } } diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc index f59a1a9d6..083709768 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterCUDA.cc @@ -82,7 +82,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, ") differs the one from SiPixelROCsStatusAndMappingWrapper. Please fix your configuration."); } // get the GPU product already here so that the async transfer can begin - const auto & gpuMap = hgpuMap.getGPUProductAsync(ctx.stream()); + auto gpuMap = hgpuMap.getGPUProductAsync(ctx.stream()); const unsigned char* gpuModulesToUnpack = hgpuMap.getModToUnpAllAsync(ctx.stream()); auto const& hgains = iSetup.get(); diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu index c894161df..8711740a3 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu @@ -182,7 +182,7 @@ namespace pixelgpudetails { __device__ uint8_t checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, - const SiPixelROCsStatusAndMapping &cablingMap, + SiPixelROCsStatusAndMappingConstView &cablingMap, bool debug = false) { uint8_t errorType = (errorWord >> sipixelconstants::ROC_shift) & sipixelconstants::ERROR_mask; if (errorType < 25) @@ -262,7 +262,7 @@ namespace pixelgpudetails { __device__ uint32_t getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, - const SiPixelROCsStatusAndMapping &cablingMap, + SiPixelROCsStatusAndMappingConstView &cablingMap, bool debug = false) { uint32_t rID = 0xffffffff; @@ -329,7 +329,7 @@ namespace pixelgpudetails { } // Kernel to perform Raw to Digi conversion - __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping cablingMap, + __global__ void RawToDigi_kernel(SiPixelROCsStatusAndMappingConstView cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, const uint32_t *word, @@ -494,7 +494,7 @@ namespace pixelgpudetails { // Interface to outside void SiPixelRawToClusterGPUKernel::makeClustersAsync(bool isRun2, const SiPixelClusterThresholds clusterThresholds, - const SiPixelROCsStatusAndMapping &cablingMap, + SiPixelROCsStatusAndMappingConstView & cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender &wordFed, diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h index 9de7f682a..2297c296e 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h @@ -16,7 +16,7 @@ // local include(s) #include "SiPixelClusterThresholds.h" -struct SiPixelROCsStatusAndMapping; +struct SiPixelROCsStatusAndMappingConstView; class SiPixelGainForHLTonGPU; namespace pixelgpudetails { @@ -135,7 +135,7 @@ namespace pixelgpudetails { void makeClustersAsync(bool isRun2, const SiPixelClusterThresholds clusterThresholds, - const SiPixelROCsStatusAndMapping &cablingMap, + SiPixelROCsStatusAndMappingConstView & cablingMap, const unsigned char* modToUnp, const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index c3709f42f..7b62e6017 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -10,7 +10,7 @@ // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. -generate_SoA_store(SoA1, +generate_SoA_store(SoA1Template, // predefined static scalars // size_t size; // size_t alignment; @@ -33,43 +33,49 @@ generate_SoA_store(SoA1, SoA_scalar(uint32_t, someNumber) ); +using SoA1 = SoA1Template<>; + // A 1 to 1 view of the store (except for unsupported types). -generate_SoA_view(SoA1View, +generate_SoA_view(SoA1ViewTemplate, SoA_view_store_list( SoA_view_store(SoA1, soa1) ), SoA_view_value_list( - SoA_view_value(soa1, x, x), - SoA_view_value(soa1, y, y), - SoA_view_value(soa1, z, z), - SoA_view_value(soa1, color, color), - SoA_view_value(soa1, value, value), - SoA_view_value(soa1, py, py), - SoA_view_value(soa1, count, count), - SoA_view_value(soa1, anotherCount, anotherCount), - SoA_view_value(soa1, description, description), - SoA_view_value(soa1, someNumber, someNumber) + SoA_view_value(soa1, x), + SoA_view_value(soa1, y), + SoA_view_value(soa1, z), + SoA_view_value(soa1, color), + SoA_view_value(soa1, value), + SoA_view_value(soa1, py), + SoA_view_value(soa1, count), + SoA_view_value(soa1, anotherCount), + SoA_view_value(soa1, description), + SoA_view_value(soa1, someNumber) ) ); +using SoA1View = SoA1ViewTemplate<>; + // A partial view (artificial mix of store and view) -generate_SoA_view(SoA1View2G, +generate_SoA_view(SoA1View2GTemplate, SoA_view_store_list( SoA_view_store(SoA1, soa1), SoA_view_store(SoA1View, soa1v) ), SoA_view_value_list( - SoA_view_value(soa1, x, x), - SoA_view_value(soa1v, y, y), - SoA_view_value(soa1, color, color), - SoA_view_value(soa1v, value, value), - SoA_view_value(soa1v, count, count), - SoA_view_value(soa1, anotherCount, anotherCount), - SoA_view_value(soa1v, description, description), - SoA_view_value(soa1, someNumber, someNumber) + SoA_view_value(soa1, x), + SoA_view_value(soa1v, y), + SoA_view_value(soa1, color), + SoA_view_value(soa1v, value), + SoA_view_value(soa1v, count), + SoA_view_value(soa1, anotherCount), + SoA_view_value(soa1v, description), + SoA_view_value(soa1, someNumber) ) ); +using SoA1View2G = SoA1View2GTemplate<>; + // Same partial view, yet const. @@ -79,18 +85,18 @@ generate_SoA_const_view(SoA1View2Gconst, SoA_view_store(SoA1View, soa1v) ), SoA_view_value_list( - SoA_view_value(soa1, x, x), - SoA_view_value(soa1v, y, y), + SoA_view_value(soa1, x), + SoA_view_value(soa1v, y), /* Eigen columns are not supported in views. SoA_view_value(soa1, a, a), SoA_view_value(soa1, b, b), SoA_view_value(soa1, r, r), */ - SoA_view_value(soa1, color, color), - SoA_view_value(soa1v, value, value), - SoA_view_value(soa1v, count, count), - SoA_view_value(soa1, anotherCount, anotherCount), - SoA_view_value(soa1v, description, description), - SoA_view_value(soa1, someNumber, someNumber) + SoA_view_value(soa1, color), + SoA_view_value(soa1v, value), + SoA_view_value(soa1v, count), + SoA_view_value(soa1, anotherCount), + SoA_view_value(soa1v, description), + SoA_view_value(soa1, someNumber) ) ); From a4d1b46c04a8bb1ccd626d612300c4f604154dd3 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 2 Dec 2021 17:46:15 +0100 Subject: [PATCH 18/50] [cudadev] Moving accesses from layouts to views. --- .../CUDADataFormats/SiPixelClustersCUDA.cc | 7 +-- .../CUDADataFormats/SiPixelClustersCUDA.h | 50 ++++++++++++------- .../CUDADataFormats/SiPixelDigisCUDA.cc | 32 ++++++------ .../CUDADataFormats/SiPixelDigisCUDA.h | 37 ++++++++++---- src/cudadev/DataFormats/SoAStore.h | 3 ++ src/cudadev/test/SoAStoreAndView_t.cu | 30 +++++------ 6 files changed, 100 insertions(+), 59 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc index 6d9472654..90361b048 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc @@ -3,9 +3,10 @@ #include "CUDACore/host_unique_ptr.h" #include "CUDADataFormats/SiPixelClustersCUDA.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(): data_d(), deviceStore_(data_d.get(), 0) {} +SiPixelClustersCUDA::SiPixelClustersCUDA(): data_d(), deviceLayout_(data_d.get(), 0), deviceView_(deviceLayout_) {} SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) - : data_d(cms::cuda::make_device_unique(DeviceStore::computeDataSize(maxModules), stream)), - deviceStore_(data_d.get(), maxModules) + : data_d(cms::cuda::make_device_unique(DeviceLayout::computeDataSize(maxModules), stream)), + deviceLayout_(data_d.get(), maxModules), + deviceView_(deviceLayout_) {} diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 51f8087ef..51c80be46 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -11,7 +11,7 @@ class SiPixelClustersCUDA { public: - generate_SoA_store(DeviceStoreTemplate, + generate_SoA_store(DeviceLayoutTemplate, SoA_column(uint32_t, moduleStart), // index of the first pixel of each module SoA_column(uint32_t, clusInModule), // number of clusters found in each module SoA_column(uint32_t, moduleId), // module id of each module @@ -21,17 +21,31 @@ class SiPixelClustersCUDA { ); // We use all defaults for the template parameters. - using DeviceStore = DeviceStoreTemplate<>; + using DeviceLayout = DeviceLayoutTemplate<>; + generate_SoA_view(DeviceViewTemplate, + SoA_view_store_list(SoA_view_store(DeviceLayout, deviceLayout)), + SoA_view_value_list( + SoA_view_value(deviceLayout, moduleStart), // index of the first pixel of each module + SoA_view_value(deviceLayout, clusInModule), // number of clusters found in each module + SoA_view_value(deviceLayout, moduleId), // module id of each module + + // originally from rechits + SoA_view_value(deviceLayout, clusModuleStart) // index of the first cluster of each module + ) + ); + + using DeviceView = DeviceViewTemplate<>; + generate_SoA_const_view(DeviceConstViewTemplate, - SoA_view_store_list(SoA_view_store(DeviceStore, deviceStore)), + SoA_view_store_list(SoA_view_store(DeviceView, deviceView)), SoA_view_value_list( - SoA_view_value(deviceStore, moduleStart), // index of the first pixel of each module - SoA_view_value(deviceStore, clusInModule), // number of clusters found in each module - SoA_view_value(deviceStore, moduleId), // module id of each module + SoA_view_value(deviceView, moduleStart), // index of the first pixel of each module + SoA_view_value(deviceView, clusInModule), // number of clusters found in each module + SoA_view_value(deviceView, moduleId), // module id of each module // originally from rechits - SoA_view_value(deviceStore, clusModuleStart) // index of the first cluster of each module + SoA_view_value(deviceView, clusModuleStart) // index of the first cluster of each module ) ); @@ -50,21 +64,23 @@ class SiPixelClustersCUDA { uint32_t nClusters() const { return nClusters_h; } - uint32_t *moduleStart() { return deviceStore_.moduleStart(); } - uint32_t *clusInModule() { return deviceStore_.clusInModule(); } - uint32_t *moduleId() { return deviceStore_.moduleId(); } - uint32_t *clusModuleStart() { return deviceStore_.clusModuleStart(); } + uint32_t *moduleStart() { return deviceView_.moduleStart(); } + uint32_t *clusInModule() { return deviceView_.clusInModule(); } + uint32_t *moduleId() { return deviceView_.moduleId(); } + uint32_t *clusModuleStart() { return deviceView_.clusModuleStart(); } - uint32_t const *moduleStart() const { return deviceStore_.moduleStart(); } - uint32_t const *clusInModule() const { return deviceStore_.clusInModule(); } - uint32_t const *moduleId() const { return deviceStore_.moduleId(); } - uint32_t const *clusModuleStart() const { return deviceStore_.clusModuleStart(); } + uint32_t const *moduleStart() const { return deviceView_.moduleStart(); } + uint32_t const *clusInModule() const { return deviceView_.clusInModule(); } + uint32_t const *moduleId() const { return deviceView_.moduleId(); } + uint32_t const *clusModuleStart() const { return deviceView_.clusModuleStart(); } - DeviceConstView view() const { return DeviceConstView(deviceStore_); } + DeviceConstView view() const { return DeviceConstView(deviceView_); } private: cms::cuda::device::unique_ptr data_d; // Single SoA storage - DeviceStore deviceStore_; + DeviceLayout deviceLayout_; + DeviceView deviceView_; + uint32_t nClusters_h = 0; }; diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc index fce90e452..331e5c571 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc @@ -6,30 +6,32 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) : data_d(cms::cuda::make_device_unique( - DeviceOnlyStore::computeDataSize(maxFedWords) + - HostDeviceStore::computeDataSize(maxFedWords), + DeviceOnlyLayout::computeDataSize(maxFedWords) + + HostDeviceLayout::computeDataSize(maxFedWords), stream)), - deviceOnlyStore_d(data_d.get(), maxFedWords), - hostDeviceStore_d(deviceOnlyStore_d.soaMetadata().nextByte(), maxFedWords), - deviceFullView_(deviceOnlyStore_d, hostDeviceStore_d), + deviceOnlyLayout_d(data_d.get(), maxFedWords), + hostDeviceLayout_d(deviceOnlyLayout_d.soaMetadata().nextByte(), maxFedWords), + deviceFullView_(deviceOnlyLayout_d, hostDeviceLayout_d), devicePixelConstView_(deviceFullView_) {} SiPixelDigisCUDA::SiPixelDigisCUDA() - : data_d(),deviceOnlyStore_d(), hostDeviceStore_d(), deviceFullView_(), devicePixelConstView_() + : data_d(),deviceOnlyLayout_d(), hostDeviceLayout_d(), deviceFullView_(), devicePixelConstView_() {} SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer() - : data_h(), hostStore_(nullptr, 0) + : data_h(), hostLayout_(nullptr, 0), hostView_(hostLayout_) {} SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream) - : data_h(cms::cuda::make_host_unique(SiPixelDigisCUDA::HostDeviceStore::computeDataSize(maxFedWords), stream)), - hostStore_(data_h.get(), maxFedWords) + : data_h(cms::cuda::make_host_unique(SiPixelDigisCUDA::HostDeviceLayout::computeDataSize(maxFedWords), stream)), + hostLayout_(data_h.get(), maxFedWords), + hostView_(hostLayout_) {} void SiPixelDigisCUDA::HostStoreAndBuffer::reset() { - hostStore_ = HostDeviceStore(); + hostLayout_ = HostDeviceLayout(); + hostView_ = HostDeviceView(hostLayout_); data_h.reset(); } @@ -45,13 +47,15 @@ SiPixelDigisCUDA::HostStoreAndBuffer SiPixelDigisCUDA::dataToHostAsync(cudaStrea // Due to the compaction with the 2D copy, we need to know the precise geometry, and hence operate on the store (as opposed // to the view, which is unaware of the column pitches. HostStoreAndBuffer ret(nDigis(), stream); - cudaCheck(cudaMemcpyAsync(ret.hostStore_.adc(), hostDeviceStore_d.adc(), nDigis_h * sizeof(decltype(*deviceFullView_.adc())), + auto rhlsm = ret.hostLayout_.soaMetadata(); + auto hdlsm_d = hostDeviceLayout_d.soaMetadata(); + cudaCheck(cudaMemcpyAsync(rhlsm.addressOf_adc(), hdlsm_d.addressOf_adc(), nDigis_h * sizeof(*rhlsm.addressOf_adc()), cudaMemcpyDeviceToHost, stream)); // Copy the other columns, realigning the data in shorter arrays. clus is the first but all 3 columns (clus, pdigis, rawIdArr) have // the same geometry. - cudaCheck(cudaMemcpy2DAsync(ret.hostStore_.clus(), ret.hostStore_.soaMetadata().clusPitch(), - hostDeviceStore_d.clus(), hostDeviceStore_d.soaMetadata().clusPitch(), + cudaCheck(cudaMemcpy2DAsync(rhlsm.addressOf_clus(), rhlsm.clusPitch(), + hdlsm_d.addressOf_clus(), hdlsm_d.clusPitch(), 3 /* rows */, - nDigis() * sizeof(decltype (*ret.hostStore_.clus())), cudaMemcpyDeviceToHost, stream)); + nDigis() * sizeof(decltype (*ret.hostView_.clus())), cudaMemcpyDeviceToHost, stream)); return ret; } \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index 7b23dd8dd..650e2f5b0 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -11,16 +11,16 @@ class SiPixelDigisCUDA { public: - generate_SoA_store(DeviceOnlyStoreTemplate, + generate_SoA_store(DeviceOnlyLayoutTemplate, /* These are consumed by downstream device code */ SoA_column(uint16_t, xx), /* local coordinates of each pixel */ SoA_column(uint16_t, yy), /* */ SoA_column(uint16_t, moduleInd) /* module id of each pixel */ ); - using DeviceOnlyStore = DeviceOnlyStoreTemplate<>; + using DeviceOnlyLayout = DeviceOnlyLayoutTemplate<>; - generate_SoA_store(HostDeviceStoreTemplate, + generate_SoA_store(HostDeviceLayoutTemplate, /* These are also transferred to host (see HostDataView) */ SoA_column(uint16_t, adc), /* ADC of each pixel */ SoA_column(int32_t, clus), /* cluster id of each pixel */ @@ -30,12 +30,26 @@ class SiPixelDigisCUDA { SoA_column(uint32_t, rawIdArr) /* DetId of each pixel */ ); - using HostDeviceStore = HostDeviceStoreTemplate<>; + using HostDeviceLayout = HostDeviceLayoutTemplate<>; + + generate_SoA_view(HostDeviceViewTemplate, + SoA_view_store_list( + SoA_view_store(HostDeviceLayout, hostDevice) + ), + SoA_view_value_list( + SoA_view_value(hostDevice, adc), /* ADC of each pixel */ + SoA_view_value(hostDevice, clus), /* cluster id of each pixel */ + SoA_view_value(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ + SoA_view_value(hostDevice, rawIdArr) /* DetId of each pixel */ + ) + ); + + using HostDeviceView = HostDeviceViewTemplate<>; generate_SoA_view(DeviceFullViewTemplate, SoA_view_store_list( - SoA_view_store(DeviceOnlyStore, deviceOnly), - SoA_view_store(HostDeviceStore, hostDevice) + SoA_view_store(DeviceOnlyLayout, deviceOnly), + SoA_view_store(HostDeviceLayout, hostDevice) ), SoA_view_value_list( SoA_view_value(deviceOnly, xx), /* local coordinates of each pixel */ @@ -45,7 +59,6 @@ class SiPixelDigisCUDA { SoA_view_value(hostDevice, clus), /* cluster id of each pixel */ SoA_view_value(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ SoA_view_value(hostDevice, rawIdArr) /* DetId of each pixel */ - /* TODO: simple, no rename interface */ ) ); @@ -106,12 +119,14 @@ class SiPixelDigisCUDA { friend SiPixelDigisCUDA; public: HostStoreAndBuffer(); - const SiPixelDigisCUDA::HostDeviceStore store() { return hostStore_; } + const SiPixelDigisCUDA::HostDeviceLayout store() { return hostLayout_; } void reset(); private: HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream); cms::cuda::host::unique_ptr data_h; - HostDeviceStore hostStore_; + HostDeviceLayout hostLayout_; + HostDeviceView hostView_; + }; HostStoreAndBuffer dataToHostAsync(cudaStream_t stream) const; @@ -123,8 +138,8 @@ class SiPixelDigisCUDA { private: // These are consumed by downstream device code cms::cuda::device::unique_ptr data_d; // Single SoA storage - DeviceOnlyStore deviceOnlyStore_d; - HostDeviceStore hostDeviceStore_d; + DeviceOnlyLayout deviceOnlyLayout_d; + HostDeviceLayout hostDeviceLayout_d; DeviceFullView deviceFullView_; DevicePixelConstView devicePixelConstView_; uint32_t nModules_h = 0; diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoAStore.h index d432fa579..0dfb8ca36 100644 --- a/src/cudadev/DataFormats/SoAStore.h +++ b/src/cudadev/DataFormats/SoAStore.h @@ -441,6 +441,7 @@ _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ }; \ \ +private:\ /* AoS-like accessor (non-const) */ \ SOA_HOST_DEVICE_INLINE \ element operator[](size_t index) { \ @@ -448,6 +449,7 @@ return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__)); \ } \ \ +/*public:*/\ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ const const_element operator[](size_t index) const { \ @@ -459,6 +461,7 @@ _ITERATE_ON_ALL(_DECLARE_SOA_ACCESSOR, ~, __VA_ARGS__) \ _ITERATE_ON_ALL(_DECLARE_SOA_CONST_ACCESSOR, ~, __VA_ARGS__) \ \ +public:\ /* dump the SoA internal structure */ \ template \ SOA_HOST_ONLY friend void dump(); \ diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index 7b62e6017..00a1c132b 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -10,7 +10,7 @@ // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. -generate_SoA_store(SoA1Template, +generate_SoA_store(SoA1LayoutTemplate, // predefined static scalars // size_t size; // size_t alignment; @@ -33,12 +33,12 @@ generate_SoA_store(SoA1Template, SoA_scalar(uint32_t, someNumber) ); -using SoA1 = SoA1Template<>; +using SoA1Layout = SoA1LayoutTemplate<>; // A 1 to 1 view of the store (except for unsupported types). generate_SoA_view(SoA1ViewTemplate, SoA_view_store_list( - SoA_view_store(SoA1, soa1) + SoA_view_store(SoA1Layout, soa1) ), SoA_view_value_list( SoA_view_value(soa1, x), @@ -59,7 +59,7 @@ using SoA1View = SoA1ViewTemplate<>; // A partial view (artificial mix of store and view) generate_SoA_view(SoA1View2GTemplate, SoA_view_store_list( - SoA_view_store(SoA1, soa1), + SoA_view_store(SoA1Layout, soa1), SoA_view_store(SoA1View, soa1v) ), SoA_view_value_list( @@ -81,7 +81,7 @@ using SoA1View2G = SoA1View2GTemplate<>; // Same partial view, yet const. generate_SoA_const_view(SoA1View2Gconst, SoA_view_store_list( - SoA_view_store(SoA1, soa1), + SoA_view_store(SoA1Layout, soa1), SoA_view_store(SoA1View, soa1v) ), SoA_view_value_list( @@ -105,43 +105,45 @@ const size_t size=10000; int main() { // Allocate buffer std::unique_ptr buffer( - static_cast(std::aligned_alloc(SoA1::defaultAlignment, SoA1::computeDataSize(size))), + static_cast(std::aligned_alloc(SoA1Layout::defaultAlignment, SoA1Layout::computeDataSize(size))), std::free); - SoA1 soa1(buffer.get(), size); + SoA1Layout soa1(buffer.get(), size); SoA1View soa1view (soa1); SoA1View2G soa1v2g (soa1, soa1view); SoA1View2Gconst soa1v2gconst (soa1, soa1view); // Write to view for (size_t i=0; i < size; i++) { - auto s = soa1[i]; + auto s = soa1view[i]; s.x = 1.0 * i; s.y = 2.0 * i; s.z = 3.0 * i; s.color() = i; - s.a()(0) = 1.0 * i; + // TODO: re-enable when support of eigen is added to views. + /*s.a()(0) = 1.0 * i; s.a()(1) = 2.0 * i; s.a()(2) = 3.0 * i; s.b()(0) = 3.0 * i; s.b()(1) = 2.0 * i; s.b()(2) = 1.0 * i; - s.r() = s.a().cross(s.b()); + s.r() = s.a().cross(s.b());*/ } // Check direct read back for (size_t i=0; i < size; i++) { - auto s = soa1[i]; + auto s = soa1view[i]; assert(s.x() == 1.0 * i); assert(s.y() == 2.0 * i); assert(s.z() == 3.0 * i); assert(s.color() == i); - assert(s.a()(0) == 1.0 * i); + // TODO: re-enable when support of eigen is added to views. + /*assert(s.a()(0) == 1.0 * i); assert(s.a()(1) == 2.0 * i); assert(s.a()(2) == 3.0 * i); assert(s.b()(0) == 3.0 * i); assert(s.b()(1) == 2.0 * i); assert(s.b()(2) == 1.0 * i); - assert(s.r() == s.a().cross(s.b())); + assert(s.r() == s.a().cross(s.b()));*/ } - // Check readback through views + // Check readback through other views for (size_t i=0; i < size; i++) { auto sv = soa1view[i]; auto sv2g = soa1v2g[i]; From 16ae05f606efcead4924885b499c249e634e1b3a Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Fri, 3 Dec 2021 13:16:00 +0100 Subject: [PATCH 19/50] [cudadev] SoA: Made store accessors private to disallow access. Added views where necessary to replace store accesses. Renamed variables to "layout" in anticipation of the renaming of the stores concept to layout. --- .../CUDADataFormats/SiPixelDigisCUDA.h | 2 +- .../TrackingRecHit2DHeterogeneous.cc | 2 +- .../TrackingRecHit2DHeterogeneous.h | 12 ++-- .../TrackingRecHit2DHostSOAStore.cc | 9 +-- .../TrackingRecHit2DHostSOAStore.h | 7 ++- .../CUDADataFormats/TrackingRecHit2DSOAView.h | 61 +++++++++++++------ src/cudadev/DataFormats/SoAStore.h | 1 - 7 files changed, 59 insertions(+), 35 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index 650e2f5b0..fcb091dd6 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -119,7 +119,7 @@ class SiPixelDigisCUDA { friend SiPixelDigisCUDA; public: HostStoreAndBuffer(); - const SiPixelDigisCUDA::HostDeviceLayout store() { return hostLayout_; } + const SiPixelDigisCUDA::HostDeviceView store() { return hostView_; } void reset(); private: HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream); diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc index 41695adc9..a805b3fa0 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.cc @@ -9,6 +9,6 @@ template <> TrackingRecHit2DHostSOAStore TrackingRecHit2DCUDA::hitsToHostAsync(cudaStream_t stream) const { // copy xl, yl, xerr, yerr, xg, yg, zg,rg, charge, clusterSizeX, clusterSizeY. TrackingRecHit2DHostSOAStore ret(nHits(), stream); - cms::cuda::copyAsync(ret.hits_h, m_hitsSupportLayerStartStore, ret.hitsStore_.soaMetadata().byteSize(), stream); + cms::cuda::copyAsync(ret.hits_h, m_hitsSupportLayerStartStore, ret.hitsLayout_.soaMetadata().byteSize(), stream); return ret; } \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h index 7fa8871d8..d1e187d5e 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h @@ -112,21 +112,21 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH (phase1PixelTopology::numberOfLayers + 1) * sizeof (TrackingRecHit2DSOAStore::PhiBinner::index_type); // Allocate the buffer m_hitsSupportLayerStartStore = Traits::template make_device_unique ( - TrackingRecHit2DSOAStore::HitsStore::computeDataSize(m_nHits) + - TrackingRecHit2DSOAStore::SupportObjectsStore::computeDataSize(m_nHits) + + TrackingRecHit2DSOAStore::HitsLayout::computeDataSize(m_nHits) + + TrackingRecHit2DSOAStore::SupportObjectsLayout::computeDataSize(m_nHits) + phiBinnerByteSize, stream); // Split the buffer in stores and array - store->m_hitsStore = TrackingRecHit2DSOAStore::HitsStore(m_hitsSupportLayerStartStore.get(), nHits); - store->m_supportObjectsStore = TrackingRecHit2DSOAStore::SupportObjectsStore(store->m_hitsStore.soaMetadata().nextByte(), nHits); + store->m_hitsStore = TrackingRecHit2DSOAStore::HitsLayout(m_hitsSupportLayerStartStore.get(), nHits); + store->m_supportObjectsStore = TrackingRecHit2DSOAStore::SupportObjectsLayout(store->m_hitsStore.soaMetadata().nextByte(), nHits); m_hitsLayerStart = store->m_hitsLayerStart = reinterpret_cast (store->m_supportObjectsStore.soaMetadata().nextByte()); // Record additional references store->m_hitsAndSupportView = TrackingRecHit2DSOAStore::HitsAndSupportView( store->m_hitsStore, store->m_supportObjectsStore ); - m_phiBinnerStorage = store->m_phiBinnerStorage = store->m_supportObjectsStore.phiBinnerStorage(); - m_iphi = store->m_supportObjectsStore.iphi(); + m_phiBinnerStorage = store->m_phiBinnerStorage = store->m_hitsAndSupportView.phiBinnerStorage(); + m_iphi = store->m_hitsAndSupportView.iphi(); } m_PhiBinnerStore = Traits::template make_device_unique(stream); diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc index 36d2848da..530daa600 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc @@ -1,15 +1,16 @@ #include "CUDADataFormats/TrackingRecHit2DHostSOAStore.h" TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(): - hitsStore_(hits_h.get(), 0 /* size */, 1 /* byte alignement */) + hitsLayout_(hits_h.get(), 0 /* size */, 1 /* byte alignement */) {} void TrackingRecHit2DHostSOAStore::reset() { hits_h.reset(); - hitsStore_ = TrackingRecHit2DSOAStore::HitsStore(); + hitsLayout_ = TrackingRecHit2DSOAStore::HitsLayout(); } TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream): - hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsStore::computeDataSize(size), stream)), - hitsStore_(hits_h.get(), size) + hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsLayout::computeDataSize(size), stream)), + hitsLayout_(hits_h.get(), size), + hitsView_(hitsLayout_) {} diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h index 8c4bc15ec..f2b34e9bc 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h @@ -14,12 +14,13 @@ struct TrackingRecHit2DHostSOAStore { public: TrackingRecHit2DHostSOAStore(); void reset(); - __device__ __forceinline__ const auto operator[](size_t i) const { return hitsStore_[i]; } - __device__ __forceinline__ size_t size() { return hitsStore_.soaMetadata().size(); } + __device__ __forceinline__ const auto operator[](size_t i) const { return hitsView_[i]; } + __device__ __forceinline__ size_t size() { return /* TODO: move to view when view will embed size */hitsLayout_.soaMetadata().size(); } private: TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream); cms::cuda::host::unique_ptr hits_h; - TrackingRecHit2DSOAStore::HitsStore hitsStore_; + TrackingRecHit2DSOAStore::HitsLayout hitsLayout_; + TrackingRecHit2DSOAStore::HitsView hitsView_; }; diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index 16ededc67..bb75da087 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -31,7 +31,7 @@ class TrackingRecHit2DSOAStore { // Sill, we need the 32 bits integers to be aligned, so we simply declare the SoA with the 32 bits fields first // and the 16 bits behind (as they have a looser alignment requirement. Then the SoA can be create with a byte // alignment of 1) - generate_SoA_store(HitsStoreTemplate, + generate_SoA_store(HitsLayoutTemplate, // 32 bits section // local coord SoA_column(float, xLocal), @@ -55,9 +55,32 @@ class TrackingRecHit2DSOAStore { ); // The hits store does not use default alignment but a more relaxed one. - using HitsStore = HitsStoreTemplate; + using HitsLayout = HitsLayoutTemplate; - generate_SoA_store(SupportObjectsStoreTemplate, + generate_SoA_view(HitsViewTemplate, + SoA_view_store_list( + SoA_view_store(HitsLayout, hitsLayout) + ), + SoA_view_value_list( + SoA_view_value(hitsLayout, xLocal), + SoA_view_value(hitsLayout, yLocal), + SoA_view_value(hitsLayout, xerrLocal), + SoA_view_value(hitsLayout, yerrLocal), + + SoA_view_value(hitsLayout, xGlobal), + SoA_view_value(hitsLayout, yGlobal), + SoA_view_value(hitsLayout, zGlobal), + SoA_view_value(hitsLayout, rGlobal), + + SoA_view_value(hitsLayout, charge), + SoA_view_value(hitsLayout, clusterSizeX), + SoA_view_value(hitsLayout, clusterSizeY) + ) + ); + + using HitsView = HitsViewTemplate<>; + + generate_SoA_store(SupportObjectsLayoutTemplate, // This is the end of the data which is transferred to host. The following columns are supporting // objects, not transmitted @@ -72,27 +95,27 @@ class TrackingRecHit2DSOAStore { ); // The support objects store also not use default alignment but a more relaxed one. - using SupportObjectsStore = SupportObjectsStoreTemplate; + using SupportObjectsLayout = SupportObjectsLayoutTemplate; generate_SoA_view(HitsAndSupportViewTemplate, SoA_view_store_list( - SoA_view_store(HitsStore, hitsStore), - SoA_view_store(SupportObjectsStore, supportObjectsStore) + SoA_view_store(HitsLayout, hitsLayout), + SoA_view_store(SupportObjectsLayout, supportObjectsStore) ), SoA_view_value_list( - SoA_view_value(hitsStore, xLocal), - SoA_view_value(hitsStore, yLocal), - SoA_view_value(hitsStore, xerrLocal), - SoA_view_value(hitsStore, yerrLocal), + SoA_view_value(hitsLayout, xLocal), + SoA_view_value(hitsLayout, yLocal), + SoA_view_value(hitsLayout, xerrLocal), + SoA_view_value(hitsLayout, yerrLocal), - SoA_view_value(hitsStore, xGlobal), - SoA_view_value(hitsStore, yGlobal), - SoA_view_value(hitsStore, zGlobal), - SoA_view_value(hitsStore, rGlobal), + SoA_view_value(hitsLayout, xGlobal), + SoA_view_value(hitsLayout, yGlobal), + SoA_view_value(hitsLayout, zGlobal), + SoA_view_value(hitsLayout, rGlobal), - SoA_view_value(hitsStore, charge), - SoA_view_value(hitsStore, clusterSizeX), - SoA_view_value(hitsStore, clusterSizeY), + SoA_view_value(hitsLayout, charge), + SoA_view_value(hitsLayout, clusterSizeX), + SoA_view_value(hitsLayout, clusterSizeY), SoA_view_value(supportObjectsStore, phiBinnerStorage), SoA_view_value(supportObjectsStore, iphi), @@ -125,9 +148,9 @@ class TrackingRecHit2DSOAStore { private: // hits store - HitsStore m_hitsStore; + HitsLayout m_hitsStore; // supporting objects store - SupportObjectsStore m_supportObjectsStore; + SupportObjectsLayout m_supportObjectsStore; // Global view simplifying usage HitsAndSupportView m_hitsAndSupportView; diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoAStore.h index 0dfb8ca36..bb0a441c5 100644 --- a/src/cudadev/DataFormats/SoAStore.h +++ b/src/cudadev/DataFormats/SoAStore.h @@ -449,7 +449,6 @@ private:\ return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__)); \ } \ \ -/*public:*/\ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ const const_element operator[](size_t index) const { \ From 8107f13c318413a622212818feb8549f17ad0c46 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Fri, 3 Dec 2021 17:35:15 +0100 Subject: [PATCH 20/50] [cudadev] Removed accessors from stores/layouts and completed the renaming from store to layout. --- .../CUDADataFormats/SiPixelClustersCUDA.h | 36 ++-- .../CUDADataFormats/SiPixelDigisCUDA.cc | 10 +- .../CUDADataFormats/SiPixelDigisCUDA.h | 84 ++++---- .../TrackingRecHit2DHeterogeneous.h | 10 +- .../CUDADataFormats/TrackingRecHit2DSOAView.h | 110 +++++----- .../CondFormats/SiPixelROCsStatusAndMapping.h | 42 ++-- .../SiPixelROCsStatusAndMappingWrapper.cc | 2 +- .../SiPixelROCsStatusAndMappingWrapper.h | 10 +- src/cudadev/DataFormats/SoACommon.h | 8 +- .../DataFormats/{SoAStore.h => SoALayout.h} | 196 ++---------------- src/cudadev/DataFormats/SoAView.h | 187 +++++++++-------- ...elROCsStatusAndMappingWrapperESProducer.cc | 4 +- .../SiPixelDigisSoAFromCUDA.cc | 4 +- src/cudadev/test/SoAStoreAndView_t.cu | 104 +++++----- 14 files changed, 325 insertions(+), 482 deletions(-) rename src/cudadev/DataFormats/{SoAStore.h => SoALayout.h} (68%) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 51c80be46..4145c5eb7 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -4,48 +4,48 @@ #include "CUDACore/device_unique_ptr.h" #include "CUDACore/host_unique_ptr.h" #include "CUDACore/cudaCompat.h" -#include "DataFormats/SoAStore.h" +#include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" #include class SiPixelClustersCUDA { public: - generate_SoA_store(DeviceLayoutTemplate, - SoA_column(uint32_t, moduleStart), // index of the first pixel of each module - SoA_column(uint32_t, clusInModule), // number of clusters found in each module - SoA_column(uint32_t, moduleId), // module id of each module + GENERATE_SOA_LAYOUT(DeviceLayoutTemplate, + SOA_COLUMN(uint32_t, moduleStart), // index of the first pixel of each module + SOA_COLUMN(uint32_t, clusInModule), // number of clusters found in each module + SOA_COLUMN(uint32_t, moduleId), // module id of each module // originally from rechits - SoA_column(uint32_t, clusModuleStart) // index of the first cluster of each module + SOA_COLUMN(uint32_t, clusModuleStart) // index of the first cluster of each module ); // We use all defaults for the template parameters. using DeviceLayout = DeviceLayoutTemplate<>; generate_SoA_view(DeviceViewTemplate, - SoA_view_store_list(SoA_view_store(DeviceLayout, deviceLayout)), - SoA_view_value_list( - SoA_view_value(deviceLayout, moduleStart), // index of the first pixel of each module - SoA_view_value(deviceLayout, clusInModule), // number of clusters found in each module - SoA_view_value(deviceLayout, moduleId), // module id of each module + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceLayout, deviceLayout)), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(deviceLayout, moduleStart), // index of the first pixel of each module + SOA_VIEW_VALUE(deviceLayout, clusInModule), // number of clusters found in each module + SOA_VIEW_VALUE(deviceLayout, moduleId), // module id of each module // originally from rechits - SoA_view_value(deviceLayout, clusModuleStart) // index of the first cluster of each module + SOA_VIEW_VALUE(deviceLayout, clusModuleStart) // index of the first cluster of each module ) ); using DeviceView = DeviceViewTemplate<>; generate_SoA_const_view(DeviceConstViewTemplate, - SoA_view_store_list(SoA_view_store(DeviceView, deviceView)), - SoA_view_value_list( - SoA_view_value(deviceView, moduleStart), // index of the first pixel of each module - SoA_view_value(deviceView, clusInModule), // number of clusters found in each module - SoA_view_value(deviceView, moduleId), // module id of each module + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceView, deviceView)), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(deviceView, moduleStart), // index of the first pixel of each module + SOA_VIEW_VALUE(deviceView, clusInModule), // number of clusters found in each module + SOA_VIEW_VALUE(deviceView, moduleId), // module id of each module // originally from rechits - SoA_view_value(deviceView, clusModuleStart) // index of the first cluster of each module + SOA_VIEW_VALUE(deviceView, clusModuleStart) // index of the first cluster of each module ) ); diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc index 331e5c571..a816806f8 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc @@ -19,17 +19,17 @@ SiPixelDigisCUDA::SiPixelDigisCUDA() : data_d(),deviceOnlyLayout_d(), hostDeviceLayout_d(), deviceFullView_(), devicePixelConstView_() {} -SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer() +SiPixelDigisCUDA::HostStore::HostStore() : data_h(), hostLayout_(nullptr, 0), hostView_(hostLayout_) {} -SiPixelDigisCUDA::HostStoreAndBuffer::HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream) +SiPixelDigisCUDA::HostStore::HostStore(size_t maxFedWords, cudaStream_t stream) : data_h(cms::cuda::make_host_unique(SiPixelDigisCUDA::HostDeviceLayout::computeDataSize(maxFedWords), stream)), hostLayout_(data_h.get(), maxFedWords), hostView_(hostLayout_) {} -void SiPixelDigisCUDA::HostStoreAndBuffer::reset() { +void SiPixelDigisCUDA::HostStore::reset() { hostLayout_ = HostDeviceLayout(); hostView_ = HostDeviceView(hostLayout_); data_h.reset(); @@ -42,11 +42,11 @@ cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStr return ret; } -SiPixelDigisCUDA::HostStoreAndBuffer SiPixelDigisCUDA::dataToHostAsync(cudaStream_t stream) const { +SiPixelDigisCUDA::HostStore SiPixelDigisCUDA::dataToHostAsync(cudaStream_t stream) const { // Allocate the needed space only and build the compact data in place in host memory (from the larger device memory). // Due to the compaction with the 2D copy, we need to know the precise geometry, and hence operate on the store (as opposed // to the view, which is unaware of the column pitches. - HostStoreAndBuffer ret(nDigis(), stream); + HostStore ret(nDigis(), stream); auto rhlsm = ret.hostLayout_.soaMetadata(); auto hdlsm_d = hostDeviceLayout_d.soaMetadata(); cudaCheck(cudaMemcpyAsync(rhlsm.addressOf_adc(), hdlsm_d.addressOf_adc(), nDigis_h * sizeof(*rhlsm.addressOf_adc()), diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index fcb091dd6..8392bf083 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -6,59 +6,59 @@ #include "CUDACore/cudaCompat.h" #include "CUDACore/device_unique_ptr.h" #include "CUDACore/host_unique_ptr.h" -#include "DataFormats/SoAStore.h" +#include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" class SiPixelDigisCUDA { public: - generate_SoA_store(DeviceOnlyLayoutTemplate, + GENERATE_SOA_LAYOUT(DeviceOnlyLayoutTemplate, /* These are consumed by downstream device code */ - SoA_column(uint16_t, xx), /* local coordinates of each pixel */ - SoA_column(uint16_t, yy), /* */ - SoA_column(uint16_t, moduleInd) /* module id of each pixel */ + SOA_COLUMN(uint16_t, xx), /* local coordinates of each pixel */ + SOA_COLUMN(uint16_t, yy), /* */ + SOA_COLUMN(uint16_t, moduleInd) /* module id of each pixel */ ); using DeviceOnlyLayout = DeviceOnlyLayoutTemplate<>; - generate_SoA_store(HostDeviceLayoutTemplate, + GENERATE_SOA_LAYOUT(HostDeviceLayoutTemplate, /* These are also transferred to host (see HostDataView) */ - SoA_column(uint16_t, adc), /* ADC of each pixel */ - SoA_column(int32_t, clus), /* cluster id of each pixel */ + SOA_COLUMN(uint16_t, adc), /* ADC of each pixel */ + SOA_COLUMN(int32_t, clus), /* cluster id of each pixel */ /* These are for CPU output; should we (eventually) place them to a */ /* separate product? */ - SoA_column(uint32_t, pdigi), /* packed digi (row, col, adc) of each pixel */ - SoA_column(uint32_t, rawIdArr) /* DetId of each pixel */ + SOA_COLUMN(uint32_t, pdigi), /* packed digi (row, col, adc) of each pixel */ + SOA_COLUMN(uint32_t, rawIdArr) /* DetId of each pixel */ ); using HostDeviceLayout = HostDeviceLayoutTemplate<>; generate_SoA_view(HostDeviceViewTemplate, - SoA_view_store_list( - SoA_view_store(HostDeviceLayout, hostDevice) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice) ), - SoA_view_value_list( - SoA_view_value(hostDevice, adc), /* ADC of each pixel */ - SoA_view_value(hostDevice, clus), /* cluster id of each pixel */ - SoA_view_value(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ - SoA_view_value(hostDevice, rawIdArr) /* DetId of each pixel */ + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(hostDevice, adc), /* ADC of each pixel */ + SOA_VIEW_VALUE(hostDevice, clus), /* cluster id of each pixel */ + SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ + SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ ) ); using HostDeviceView = HostDeviceViewTemplate<>; generate_SoA_view(DeviceFullViewTemplate, - SoA_view_store_list( - SoA_view_store(DeviceOnlyLayout, deviceOnly), - SoA_view_store(HostDeviceLayout, hostDevice) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(DeviceOnlyLayout, deviceOnly), + SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice) ), - SoA_view_value_list( - SoA_view_value(deviceOnly, xx), /* local coordinates of each pixel */ - SoA_view_value(deviceOnly, yy), /* */ - SoA_view_value(deviceOnly, moduleInd),/* module id of each pixel */ - SoA_view_value(hostDevice, adc), /* ADC of each pixel */ - SoA_view_value(hostDevice, clus), /* cluster id of each pixel */ - SoA_view_value(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ - SoA_view_value(hostDevice, rawIdArr) /* DetId of each pixel */ + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(deviceOnly, xx), /* local coordinates of each pixel */ + SOA_VIEW_VALUE(deviceOnly, yy), /* */ + SOA_VIEW_VALUE(deviceOnly, moduleInd),/* module id of each pixel */ + SOA_VIEW_VALUE(hostDevice, adc), /* ADC of each pixel */ + SOA_VIEW_VALUE(hostDevice, clus), /* cluster id of each pixel */ + SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ + SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ ) ); @@ -66,17 +66,17 @@ class SiPixelDigisCUDA { /* Device pixel view: this is a second generation view (view from view) */ generate_SoA_const_view(DevicePixelConstViewTemplate, - /* We get out data from the DeviceFullStore */ - SoA_view_store_list( - SoA_view_store(DeviceFullView, deviceFullView) + /* We get out data from the DeviceFullView */ + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(DeviceFullView, deviceFullView) ), /* These are consumed by downstream device code */ - SoA_view_value_list( - SoA_view_value(deviceFullView, xx), /* local coordinates of each pixel */ - SoA_view_value(deviceFullView, yy), /* */ - SoA_view_value(deviceFullView, moduleInd), /* module id of each pixel */ - SoA_view_value(deviceFullView, adc), /* ADC of each pixel */ - SoA_view_value(deviceFullView, clus) /* cluster id of each pixel */ + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(deviceFullView, xx), /* local coordinates of each pixel */ + SOA_VIEW_VALUE(deviceFullView, yy), /* */ + SOA_VIEW_VALUE(deviceFullView, moduleInd), /* module id of each pixel */ + SOA_VIEW_VALUE(deviceFullView, adc), /* ADC of each pixel */ + SOA_VIEW_VALUE(deviceFullView, clus) /* cluster id of each pixel */ ) ); @@ -115,20 +115,20 @@ class SiPixelDigisCUDA { uint32_t const *pdigi() const { return deviceFullView_.pdigi(); } uint32_t const *rawIdArr() const { return deviceFullView_.rawIdArr(); } - class HostStoreAndBuffer { + class HostStore { friend SiPixelDigisCUDA; public: - HostStoreAndBuffer(); - const SiPixelDigisCUDA::HostDeviceView store() { return hostView_; } + HostStore(); + const SiPixelDigisCUDA::HostDeviceView view() { return hostView_; } void reset(); private: - HostStoreAndBuffer(size_t maxFedWords, cudaStream_t stream); + HostStore(size_t maxFedWords, cudaStream_t stream); cms::cuda::host::unique_ptr data_h; HostDeviceLayout hostLayout_; HostDeviceView hostView_; }; - HostStoreAndBuffer dataToHostAsync(cudaStream_t stream) const; + HostStore dataToHostAsync(cudaStream_t stream) const; // Special copy for validation cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h index d1e187d5e..26a4ca75a 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h @@ -117,13 +117,13 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH phiBinnerByteSize, stream); // Split the buffer in stores and array - store->m_hitsStore = TrackingRecHit2DSOAStore::HitsLayout(m_hitsSupportLayerStartStore.get(), nHits); - store->m_supportObjectsStore = TrackingRecHit2DSOAStore::SupportObjectsLayout(store->m_hitsStore.soaMetadata().nextByte(), nHits); - m_hitsLayerStart = store->m_hitsLayerStart = reinterpret_cast (store->m_supportObjectsStore.soaMetadata().nextByte()); + store->m_hitsLayout = TrackingRecHit2DSOAStore::HitsLayout(m_hitsSupportLayerStartStore.get(), nHits); + store->m_supportObjectsLayout = TrackingRecHit2DSOAStore::SupportObjectsLayout(store->m_hitsLayout.soaMetadata().nextByte(), nHits); + m_hitsLayerStart = store->m_hitsLayerStart = reinterpret_cast (store->m_supportObjectsLayout.soaMetadata().nextByte()); // Record additional references store->m_hitsAndSupportView = TrackingRecHit2DSOAStore::HitsAndSupportView( - store->m_hitsStore, - store->m_supportObjectsStore + store->m_hitsLayout, + store->m_supportObjectsLayout ); m_phiBinnerStorage = store->m_phiBinnerStorage = store->m_hitsAndSupportView.phiBinnerStorage(); m_iphi = store->m_hitsAndSupportView.iphi(); diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index bb75da087..60417b447 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -7,7 +7,7 @@ #include "CUDACore/HistoContainer.h" #include "CUDACore/cudaCompat.h" #include "Geometry/phase1PixelTopology.h" -#include "DataFormats/SoAStore.h" +#include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" namespace pixelCPEforGPU { @@ -31,95 +31,95 @@ class TrackingRecHit2DSOAStore { // Sill, we need the 32 bits integers to be aligned, so we simply declare the SoA with the 32 bits fields first // and the 16 bits behind (as they have a looser alignment requirement. Then the SoA can be create with a byte // alignment of 1) - generate_SoA_store(HitsLayoutTemplate, + GENERATE_SOA_LAYOUT(HitsLayoutTemplate, // 32 bits section // local coord - SoA_column(float, xLocal), - SoA_column(float, yLocal), - SoA_column(float, xerrLocal), - SoA_column(float, yerrLocal), + SOA_COLUMN(float, xLocal), + SOA_COLUMN(float, yLocal), + SOA_COLUMN(float, xerrLocal), + SOA_COLUMN(float, yerrLocal), // global coord - SoA_column(float, xGlobal), - SoA_column(float, yGlobal), - SoA_column(float, zGlobal), - SoA_column(float, rGlobal), + SOA_COLUMN(float, xGlobal), + SOA_COLUMN(float, yGlobal), + SOA_COLUMN(float, zGlobal), + SOA_COLUMN(float, rGlobal), // global coordinates continue in the 16 bits section // cluster properties - SoA_column(int32_t, charge), + SOA_COLUMN(int32_t, charge), // 16 bits section (and cluster properties immediately continued) - SoA_column(int16_t, clusterSizeX), - SoA_column(int16_t, clusterSizeY) + SOA_COLUMN(int16_t, clusterSizeX), + SOA_COLUMN(int16_t, clusterSizeY) ); - // The hits store does not use default alignment but a more relaxed one. + // The hits layout does not use default alignment but a more relaxed one. using HitsLayout = HitsLayoutTemplate; generate_SoA_view(HitsViewTemplate, - SoA_view_store_list( - SoA_view_store(HitsLayout, hitsLayout) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(HitsLayout, hitsLayout) ), - SoA_view_value_list( - SoA_view_value(hitsLayout, xLocal), - SoA_view_value(hitsLayout, yLocal), - SoA_view_value(hitsLayout, xerrLocal), - SoA_view_value(hitsLayout, yerrLocal), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(hitsLayout, xLocal), + SOA_VIEW_VALUE(hitsLayout, yLocal), + SOA_VIEW_VALUE(hitsLayout, xerrLocal), + SOA_VIEW_VALUE(hitsLayout, yerrLocal), - SoA_view_value(hitsLayout, xGlobal), - SoA_view_value(hitsLayout, yGlobal), - SoA_view_value(hitsLayout, zGlobal), - SoA_view_value(hitsLayout, rGlobal), + SOA_VIEW_VALUE(hitsLayout, xGlobal), + SOA_VIEW_VALUE(hitsLayout, yGlobal), + SOA_VIEW_VALUE(hitsLayout, zGlobal), + SOA_VIEW_VALUE(hitsLayout, rGlobal), - SoA_view_value(hitsLayout, charge), - SoA_view_value(hitsLayout, clusterSizeX), - SoA_view_value(hitsLayout, clusterSizeY) + SOA_VIEW_VALUE(hitsLayout, charge), + SOA_VIEW_VALUE(hitsLayout, clusterSizeX), + SOA_VIEW_VALUE(hitsLayout, clusterSizeY) ) ); using HitsView = HitsViewTemplate<>; - generate_SoA_store(SupportObjectsLayoutTemplate, + GENERATE_SOA_LAYOUT(SupportObjectsLayoutTemplate, // This is the end of the data which is transferred to host. The following columns are supporting // objects, not transmitted // Supporting data (32 bits aligned) - SoA_column(TrackingRecHit2DSOAStore::PhiBinner::index_type, phiBinnerStorage), + SOA_COLUMN(TrackingRecHit2DSOAStore::PhiBinner::index_type, phiBinnerStorage), // global coordinates (not transmitted) - SoA_column(int16_t, iphi), + SOA_COLUMN(int16_t, iphi), // cluster properties (not transmitted) - SoA_column(uint16_t, detectorIndex) + SOA_COLUMN(uint16_t, detectorIndex) ); - // The support objects store also not use default alignment but a more relaxed one. + // The support objects layouts also not use default alignment but a more relaxed one. using SupportObjectsLayout = SupportObjectsLayoutTemplate; generate_SoA_view(HitsAndSupportViewTemplate, - SoA_view_store_list( - SoA_view_store(HitsLayout, hitsLayout), - SoA_view_store(SupportObjectsLayout, supportObjectsStore) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(HitsLayout, hitsLayout), + SOA_VIEW_LAYOUT(SupportObjectsLayout, supportObjectsLayout) ), - SoA_view_value_list( - SoA_view_value(hitsLayout, xLocal), - SoA_view_value(hitsLayout, yLocal), - SoA_view_value(hitsLayout, xerrLocal), - SoA_view_value(hitsLayout, yerrLocal), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(hitsLayout, xLocal), + SOA_VIEW_VALUE(hitsLayout, yLocal), + SOA_VIEW_VALUE(hitsLayout, xerrLocal), + SOA_VIEW_VALUE(hitsLayout, yerrLocal), - SoA_view_value(hitsLayout, xGlobal), - SoA_view_value(hitsLayout, yGlobal), - SoA_view_value(hitsLayout, zGlobal), - SoA_view_value(hitsLayout, rGlobal), + SOA_VIEW_VALUE(hitsLayout, xGlobal), + SOA_VIEW_VALUE(hitsLayout, yGlobal), + SOA_VIEW_VALUE(hitsLayout, zGlobal), + SOA_VIEW_VALUE(hitsLayout, rGlobal), - SoA_view_value(hitsLayout, charge), - SoA_view_value(hitsLayout, clusterSizeX), - SoA_view_value(hitsLayout, clusterSizeY), + SOA_VIEW_VALUE(hitsLayout, charge), + SOA_VIEW_VALUE(hitsLayout, clusterSizeX), + SOA_VIEW_VALUE(hitsLayout, clusterSizeY), - SoA_view_value(supportObjectsStore, phiBinnerStorage), - SoA_view_value(supportObjectsStore, iphi), - SoA_view_value(supportObjectsStore, detectorIndex) + SOA_VIEW_VALUE(supportObjectsLayout, phiBinnerStorage), + SOA_VIEW_VALUE(supportObjectsLayout, iphi), + SOA_VIEW_VALUE(supportObjectsLayout, detectorIndex) ) ); @@ -147,10 +147,10 @@ class TrackingRecHit2DSOAStore { __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; } private: - // hits store - HitsLayout m_hitsStore; - // supporting objects store - SupportObjectsLayout m_supportObjectsStore; + // hits layout + HitsLayout m_hitsLayout; + // supporting objects layout + SupportObjectsLayout m_supportObjectsLayout; // Global view simplifying usage HitsAndSupportView m_hitsAndSupportView; diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index 75f246647..40ee0c43d 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -1,7 +1,7 @@ #ifndef CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h #define CondFormats_SiPixelObjects_interface_SiPixelROCsStatusAndMapping_h -#include "DataFormats/SoAStore.h" +#include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" namespace pixelgpudetails { @@ -14,30 +14,30 @@ namespace pixelgpudetails { constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); } // namespace pixelgpudetails -generate_SoA_store(SiPixelROCsStatusAndMappingStoreTemplate, - SoA_column(unsigned int, fed), - SoA_column(unsigned int, link), - SoA_column(unsigned int, roc), - SoA_column(unsigned int, rawId), - SoA_column(unsigned int, rocInDet), - SoA_column(unsigned int, moduleId), - SoA_column(unsigned char, badRocs), - SoA_scalar(unsigned int, size) +GENERATE_SOA_LAYOUT(SiPixelROCsStatusAndMappingLayoutTemplate, + SOA_COLUMN(unsigned int, fed), + SOA_COLUMN(unsigned int, link), + SOA_COLUMN(unsigned int, roc), + SOA_COLUMN(unsigned int, rawId), + SOA_COLUMN(unsigned int, rocInDet), + SOA_COLUMN(unsigned int, moduleId), + SOA_COLUMN(unsigned char, badRocs), + SOA_SCALAR(unsigned int, size) ); -using SiPixelROCsStatusAndMappingStore = SiPixelROCsStatusAndMappingStoreTemplate<>; +using SiPixelROCsStatusAndMappingLayout = SiPixelROCsStatusAndMappingLayoutTemplate<>; generate_SoA_const_view(SiPixelROCsStatusAndMappingConstViewTemplate, - SoA_view_store_list(SoA_view_store(SiPixelROCsStatusAndMappingStore, mappingStore)), - SoA_view_value_list( - SoA_view_value(mappingStore, fed), - SoA_view_value(mappingStore, link), - SoA_view_value(mappingStore, roc), - SoA_view_value(mappingStore, rawId), - SoA_view_value(mappingStore, rocInDet), - SoA_view_value(mappingStore, moduleId), - SoA_view_value(mappingStore, badRocs), - SoA_view_value(mappingStore, size) + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SiPixelROCsStatusAndMappingLayout, mappingLayout)), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(mappingLayout, fed), + SOA_VIEW_VALUE(mappingLayout, link), + SOA_VIEW_VALUE(mappingLayout, roc), + SOA_VIEW_VALUE(mappingLayout, rawId), + SOA_VIEW_VALUE(mappingLayout, rocInDet), + SOA_VIEW_VALUE(mappingLayout, moduleId), + SOA_VIEW_VALUE(mappingLayout, badRocs), + SOA_VIEW_VALUE(mappingLayout, size) ) ); diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc index 7b1ef9c10..6b19b1c07 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc @@ -15,7 +15,7 @@ #include "CondFormats/SiPixelROCsStatusAndMappingWrapper.h" #include "CUDACore/copyAsync.h" -SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingStore const& cablingMap, +SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingLayout const& cablingMap, std::vector modToUnp) : modToUnpDefault(modToUnp.size()), hasQuality_(true) { // TODO: check if cudaStreamDefault is appropriate diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 3ecd86131..927fb48f0 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -13,8 +13,8 @@ class SiPixelROCsStatusAndMappingWrapper { public: - /* This is using a store as the size is needed. TODO: use views when views start embedding size. */ - explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingStore const &cablingMap, + /* This is using a layout as the size is needed. TODO: use views when views start embedding size. */ + explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingLayout const &cablingMap, std::vector modToUnp); bool hasQuality() const { return hasQuality_; } @@ -34,11 +34,11 @@ class SiPixelROCsStatusAndMappingWrapper { struct GPUData { void allocate(size_t size, cudaStream_t stream) { cablingMapDeviceBuffer = cms::cuda::make_device_unique( - SiPixelROCsStatusAndMappingStore::computeDataSize(size), stream); - cablingMapDevice = SiPixelROCsStatusAndMappingStore(cablingMapDeviceBuffer.get(), size); + SiPixelROCsStatusAndMappingLayout::computeDataSize(size), stream); + cablingMapDevice = SiPixelROCsStatusAndMappingLayout(cablingMapDeviceBuffer.get(), size); } cms::cuda::device::unique_ptr cablingMapDeviceBuffer; - SiPixelROCsStatusAndMappingStore cablingMapDevice = SiPixelROCsStatusAndMappingStore(nullptr, 0); // map struct in GPU + SiPixelROCsStatusAndMappingLayout cablingMapDevice = SiPixelROCsStatusAndMappingLayout(nullptr, 0); // map struct in GPU }; cms::cuda::ESProduct gpuData_; diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 48f2c5172..d5cf7050e 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -154,9 +154,9 @@ inline size_t alignSize(size_t size, size_t alignment = 128) { enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COLUMN, eigen = _VALUE_TYPE_EIGEN_COLUMN }; -#define SoA_scalar(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) -#define SoA_column(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) -#define SoA_eigenColumn(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) +#define SOA_SCALAR(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) +#define SOA_COLUMN(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) +#define SOA_EIGEN_COLUMN(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) /* Iterate on the macro MACRO and return the result as a comma separated list */ #define _ITERATE_ON_ALL_COMMA(MACRO, DATA, ...) \ @@ -238,7 +238,7 @@ struct SoAAccessors{ }; }; -/* Enum parameters allowing templated control of store/view behaviors */ +/* Enum parameters allowing templated control of layout/view behaviors */ /* Alignement enforcement verifies every column is aligned, and * hints the compiler that it can expect column pointers to be aligned */ enum class AlignmentEnforcement : bool { Relaxed, Enforced }; diff --git a/src/cudadev/DataFormats/SoAStore.h b/src/cudadev/DataFormats/SoALayout.h similarity index 68% rename from src/cudadev/DataFormats/SoAStore.h rename to src/cudadev/DataFormats/SoALayout.h index bb0a441c5..3ea8ec414 100644 --- a/src/cudadev/DataFormats/SoAStore.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -3,8 +3,8 @@ * with compile-time size and alignment, and accessors to the "rows" and "columns". */ -#ifndef DataStructures_SoAStore_h -#define DataStructures_SoAStore_h +#ifndef DataStructures_SoALayout_h +#define DataStructures_SoALayout_h #include "SoACommon.h" @@ -13,27 +13,27 @@ /* dump SoA fields information; these should expand to, for columns: * Example: - * generate_SoA_store(SoA, + * GENERATE_SOA_LAYOUT(SoA, * // predefined static scalars * // size_t size; * // size_t alignment; * * // columns: one value per element - * SoA_FundamentalTypeColumn(double, x), - * SoA_FundamentalTypeColumn(double, y), - * SoA_FundamentalTypeColumn(double, z), - * SoA_eigenColumn(Eigen::Vector3d, a), - * SoA_eigenColumn(Eigen::Vector3d, b), - * SoA_eigenColumn(Eigen::Vector3d, r), - * SoA_column(uint16_t, colour), - * SoA_column(int32_t, value), - * SoA_column(double *, py), - * SoA_FundamentalTypeColumn(uint32_t, count), - * SoA_FundamentalTypeColumn(uint32_t, anotherCount), + * SOA_COLUMN(double, x), + * SOA_COLUMN(double, y), + * SOA_COLUMN(double, z), + * SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + * SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + * SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + * SOA_COLUMN(uint16_t, colour), + * SOA_COLUMN(int32_t, value), + * SOA_COLUMN(double *, py), + * SOA_COLUMN(uint32_t, count), + * SOA_COLUMN(uint32_t, anotherCount), * * // scalars: one value for the whole structure - * SoA_scalar(const char *, description), - * SoA_scalar(uint32_t, someNumber) + * SOA_SCALAR(const char *, description), + * SOA_SCALAR(uint32_t, someNumber) * ); * * dumps as: @@ -144,7 +144,7 @@ byteAlignment / sizeof(CPP_TYPE::Scalar);) \ if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ if (reinterpret_cast(BOOST_PP_CAT(NAME, _)) % byteAlignment) \ - throw std::out_of_range("In store constructor: misaligned column: " #NAME); + throw std::out_of_range("In layout constructor: misaligned column: " #NAME); #define _ASSIGN_SOA_COLUMN_OR_SCALAR(R, DATA, TYPE_NAME) _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL TYPE_NAME @@ -162,118 +162,6 @@ #define _ACCUMULATE_SOA_ELEMENT(R, DATA, TYPE_NAME) _ACCUMULATE_SOA_ELEMENT_IMPL TYPE_NAME -/** - * Value accessor of the const_element subclass. - */ -#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - SOA_HOST_DEVICE_INLINE \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Scalar */ \ - CPP_TYPE const& NAME() { return soa_.NAME(); }, /* Column */ \ - CPP_TYPE const& NAME() { return *(soa_.NAME() + index_); }, \ - /* Eigen column */ /* Ugly hack with a helper template to avoid having commas inside the macro parameter */ \ - EigenConstMapMaker::Type const NAME() { \ - return EigenConstMapMaker::withData(soa_.NAME() + index_) \ - .withStride(soa_.BOOST_PP_CAT(NAME, Stride)()); \ - }) - -#define _DECLARE_SOA_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) _DECLARE_SOA_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME - -/** - * Generator of parameters for (non-const) element subclass (expanded comma separated). - */ -#define _DECLARE_ELEMENT_VALUE_ARG_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - (CPP_TYPE * NAME), /* Eigen column */ \ - (CPP_TYPE::Scalar * NAME)(size_t BOOST_PP_CAT(NAME, Stride))) - -#define _DECLARE_ELEMENT_VALUE_ARG(R, DATA, TYPE_NAME) _DECLARE_ELEMENT_VALUE_ARG_IMPL TYPE_NAME - -/** - * Generator of member initialization for constructor of element subclass - */ -#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - (NAME(DATA, NAME)), /* Eigen column */ \ - (NAME(DATA, NAME, BOOST_PP_CAT(NAME, Stride)))) - -/* declare AoS-like element value args for contructor; these should expand,for columns only */ -#define _DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) - -/** - * Generator of member initialization for constructor of const element subclass - */ -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - (BOOST_PP_CAT(NAME, _)(DATA, NAME)), /* Eigen column */ \ - (BOOST_PP_CAT(NAME, _)(DATA, NAME, BOOST_PP_CAT(NAME, Stride)))) - -/* declare AoS-like element value args for contructor; these should expand,for columns only */ -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) -/** - * Generator of the member-by-member copy operator of the element subclass. - */ -#define _DECLARE_ELEMENT_VALUE_COPY_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - NAME() = other.NAME(); \ - , /* Eigen column */ \ - static_cast(NAME) = static_cast::type&>(other.NAME);) - -#define _DECLARE_ELEMENT_VALUE_COPY(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_ELEMENT_VALUE_COPY_IMPL TYPE_NAME) - -/** - * Declaration of the private members of the const element subclass - */ -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - const SoAValueWithConf BOOST_PP_CAT(NAME, _); \ - , /* Eigen column */ \ - const SoAEigenValueWithConf BOOST_PP_CAT(NAME, _);) - -#define _DECLARE_CONST_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) _DECLARE_CONST_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME - -/** - * Declaration of the members accessors of the const element subclass - */ -#define _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return BOOST_PP_CAT(NAME, _)(); }, /* Eigen column */ \ - SOA_HOST_DEVICE_INLINE const SoAEigenValueWithConf NAME() const { return BOOST_PP_CAT(NAME, _); }) - -#define _DECLARE_CONST_ELEMENT_ACCESSOR(R, DATA, TYPE_NAME) _DECLARE_CONST_ELEMENT_ACCESSOR_IMPL TYPE_NAME - -/** - * Declaration of the members of the element subclass - */ -#define _DECLARE_ELEMENT_VALUE_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - SoAValueWithConf NAME; \ - , /* Eigen column */ \ - SoAEigenValueWithConf NAME;) - -#define _DECLARE_ELEMENT_VALUE_MEMBER(R, DATA, TYPE_NAME) _DECLARE_ELEMENT_VALUE_MEMBER_IMPL TYPE_NAME - -/** - * Parameters passed to element subclass constructor in operator[] - */ -#define _DECLARE_ELEMENT_CONSTR_CALL_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE(VALUE_TYPE, /* Scalar */ \ - BOOST_PP_EMPTY(), /* Column */ \ - (BOOST_PP_CAT(NAME, _)), /* Eigen column */ \ - (BOOST_PP_CAT(NAME, _))(BOOST_PP_CAT(NAME, Stride_))) - -#define _DECLARE_ELEMENT_CONSTR_CALL(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_ELEMENT_CONSTR_CALL_IMPL TYPE_NAME) - /** * Direct access to column pointer and indexed access */ @@ -326,9 +214,9 @@ #endif /* - * A macro defining a SoA store (collection of scalars and columns of equal lengths + * A macro defining a SoA layout (collection of scalars and columns of equal lengths) */ -#define generate_SoA_store(CLASS, ...) \ +#define GENERATE_SOA_LAYOUT(CLASS, ...) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ @@ -417,50 +305,6 @@ _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ } \ \ - struct const_element { \ - SOA_HOST_DEVICE_INLINE \ - const_element(size_t index, /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_CONST_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) {} \ - _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_ACCESSOR, ~, __VA_ARGS__) \ - \ - private: \ - _ITERATE_ON_ALL(_DECLARE_CONST_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ - }; \ - \ - struct element { \ - SOA_HOST_DEVICE_INLINE \ - element(size_t index, /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_ARG, index, __VA_ARGS__)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_VALUE_MEMBER_INITIALISATION, index, __VA_ARGS__) {} \ - SOA_HOST_DEVICE_INLINE \ - element& operator=(const element& other) { \ - _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_COPY, ~, __VA_ARGS__) \ - return *this; \ - } \ - _ITERATE_ON_ALL(_DECLARE_ELEMENT_VALUE_MEMBER, ~, __VA_ARGS__) \ - }; \ - \ -private:\ - /* AoS-like accessor (non-const) */ \ - SOA_HOST_DEVICE_INLINE \ - element operator[](size_t index) { \ - rangeCheck(index); \ - return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__)); \ - } \ - \ - /* AoS-like accessor (const) */ \ - SOA_HOST_DEVICE_INLINE \ - const const_element operator[](size_t index) const { \ - rangeCheck(index); \ - return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_ELEMENT_CONSTR_CALL, ~, __VA_ARGS__)); \ - } \ - \ - /* accessors */ \ - _ITERATE_ON_ALL(_DECLARE_SOA_ACCESSOR, ~, __VA_ARGS__) \ - _ITERATE_ON_ALL(_DECLARE_SOA_CONST_ACCESSOR, ~, __VA_ARGS__) \ - \ -public:\ /* dump the SoA internal structure */ \ template \ SOA_HOST_ONLY friend void dump(); \ @@ -484,4 +328,4 @@ public:\ _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ } -#endif // ndef DataStructures_SoAStore_h +#endif // ndef DataStructures_SoALayout_h diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 2c0c8a3f5..93791c28e 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -1,8 +1,7 @@ /* - * Structure-of-Arrays template allowing access to a selection of scalars and columns from one - * or multiple SoAStores. A SoAStore is already a view to its complete set of columns. - * This class will allow handling subsets of columns or set of columns from multiple SoAViews, possibly - * with varying columns lengths. + * Structure-of-Arrays templates allowing access to a selection of scalars and columns from one + * or multiple SoA layouts or views. + * This template generator will allow handling subsets of columns from one or multiple SoA views or layouts. */ #ifndef DataStructures_SoAView_h @@ -10,30 +9,30 @@ #include "SoACommon.h" -#define SoA_view_store(TYPE, NAME) (TYPE, NAME) +#define SOA_VIEW_LAYOUT(TYPE, NAME) (TYPE, NAME) -#define SoA_view_store_list(...) __VA_ARGS__ +#define SOA_VIEW_LAYOUT_LIST(...) __VA_ARGS__ -#define SoA_view_value(STORE_NAME, STORE_MEMBER) (STORE_NAME, STORE_MEMBER, STORE_MEMBER) +#define SOA_VIEW_VALUE(LAYOUT_NAME, LAYOUT_MEMBER) (LAYOUT_NAME, LAYOUT_MEMBER, LAYOUT_MEMBER) -#define SoA_view_value_rename(STORE_NAME, STORE_MEMBER, LOCAL_NAME) (STORE_NAME, STORE_MEMBER, LOCAL_NAME) +#define SOA_VIEW_VALUE_RENAME(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) (LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) -#define SoA_view_value_list(...) __VA_ARGS__ +#define SOA_VIEW_VALUE_LIST(...) __VA_ARGS__ /* - * A macro defining a SoA view (collection of coluns from multiple stores) + * A macro defining a SoA view (collection of columns from multiple layouts or views.) * * Usage: - * generate_SoA_view(PixelXYView, - * SoA_view_store_list( - * SoA_view_store(PixelDigis, pixelDigis), - * SoA_view_store(PixelRecHitsStore, pixelsRecHit) + * GENERATE_SOA_VIEW(PixelXYView, + * SOA_VIEW_LAYOUT_LIST( + * SOA_VIEW_LAYOUT(PixelDigis, pixelDigis), + * SOA_VIEW_LAYOUT(PixelRecHitsLayout, pixelsRecHit) * ), - * SoA_view_value_list( - * SoA_view_value(pixelDigis, x, digisX), - * SoA_view_value(pixelDigis, y, digisY), - * SoA_view_value(pixelsRecHit, x, recHitsX), - * SoA_view_value(pixelsRecHit, y, recHitsY) + * SOA_VIEW_VALUE_LIST( + * SOA_VIEW_VALUE_RENAME(pixelDigis, x, digisX), + * SOA_VIEW_VALUE_RENAME(pixelDigis, y, digisY), + * SOA_VIEW_VALUE_RENAME(pixelsRecHit, x, recHitsX), + * SOA_VIEW_VALUE_RENAME(pixelsRecHit, y, recHitsY) * ) * ); * @@ -69,21 +68,21 @@ struct ConstValueTraits { */ /** - * Store types aliasing for referencing by name + * Layout types aliasing for referencing by name */ -#define _DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL(TYPE, NAME) typedef TYPE BOOST_PP_CAT(TypeOf_, NAME); +#define _DECLARE_VIEW_LAYOUT_TYPE_ALIAS_IMPL(TYPE, NAME) typedef TYPE BOOST_PP_CAT(TypeOf_, NAME); -#define _DECLARE_VIEW_STORE_TYPE_ALIAS(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_STORE_TYPE_ALIAS_IMPL TYPE_NAME) +#define _DECLARE_VIEW_LAYOUT_TYPE_ALIAS(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS_IMPL TYPE_NAME) /** * Member types aliasing for referencing by name */ -#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ - typedef typename BOOST_PP_CAT(TypeOf_, STORE_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, STORE_MEMBER) \ +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + typedef typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, LAYOUT_MEMBER) \ BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ - BOOST_PP_CAT(TypeOf_, STORE_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, STORE_MEMBER); \ + BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ SOA_HOST_DEVICE_INLINE \ DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ @@ -91,153 +90,153 @@ struct ConstValueTraits { static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::eigen, \ "Eigen columns not supported in views."); -#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) /** * Member assignment for trivial constructor */ -#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ +#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ (BOOST_PP_CAT(LOCAL_NAME, _)(nullptr)) -#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL LAYOUT_MEMBER_NAME) /** - * Generator of parameters (stores) for constructor by stores. + * Generator of parameters (layouts/views) for constructor by layouts/views. */ -#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(STORE_TYPE, STORE_NAME, DATA) (DATA STORE_TYPE & STORE_NAME) +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(LAYOUT_TYPE, LAYOUT_NAME, DATA) (DATA LAYOUT_TYPE & LAYOUT_NAME) #define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS(R, DATA, TYPE_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) /** - * Generator of parameters (stores) for constructor by column. + * Generator of parameters for constructor by column. */ -#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ +#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) -#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) +#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) /** * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(STORE, MEMBER, NAME) \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::eigen, \ "Eigen values not supported in views"); \ - auto addr = STORE.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ + auto addr = LAYOUT.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ if (reinterpret_cast(addr) % byteAlignment) \ - throw std::out_of_range("In store constructor: misaligned column: " #NAME); \ + throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ return addr; \ }())) -#define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL LAYOUT_MEMBER_NAME) /** * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(STORE, MEMBER, NAME) \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(LAYOUT, MEMBER, NAME) \ (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ if (reinterpret_cast(NAME) % byteAlignment) \ - throw std::out_of_range("In store constructor: misaligned column: " #NAME); \ + throw std::out_of_range("In constructor by column: misaligned column: " #NAME); \ return NAME; \ }())) -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL LAYOUT_MEMBER_NAME) /** * Generator of element members initializer. */ -#define _DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL(STORE, MEMBER, LOCAL_NAME, DATA) (LOCAL_NAME(DATA, LOCAL_NAME)) +#define _DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL(LAYOUT, MEMBER, LOCAL_NAME, DATA) (LOCAL_NAME(DATA, LOCAL_NAME)) -#define _DECLARE_VIEW_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) +#define _DECLARE_VIEW_ELEM_MEMBER_INIT(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) /** - * Helper macro extracting the data type from a column accessor in a store + * Helper macro extracting the data type from metadata of a layout or view */ -#define _COLUMN_TYPE(STORE_NAME, STORE_MEMBER) \ - typename std::remove_pointer::type +#define _COLUMN_TYPE(LAYOUT_NAME, LAYOUT_MEMBER) \ + typename std::remove_pointer::type /** * Generator of parameters for (non-const) element subclass (expanded comma separated). */ -#define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) -#define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, STORE_MEMBER_NAME) \ - _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA) +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA) /** * Generator of member initialization for constructor of element subclass */ -#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ (BOOST_PP_CAT(LOCAL_NAME, _)(DATA, LOCAL_NAME)) /* declare AoS-like element value args for contructor; these should expand,for columns only */ -#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) /** * Declaration of the members accessors of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME() const { \ return BOOST_PP_CAT(LOCAL_NAME, _)(); \ } -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ - _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL STORE_MEMBER_NAME +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL LAYOUT_MEMBER_NAME /** * Declaration of the private members of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ const ConstValueTraits< \ SoAConstValueWithConf, \ BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, LOCAL_NAME) \ > BOOST_PP_CAT(LOCAL_NAME, _); -#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ - _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL LAYOUT_MEMBER_NAME /** * Generator of the member-by-member copy operator of the element subclass. */ -#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) LOCAL_NAME() = other.LOCAL_NAME(); +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) LOCAL_NAME() = other.LOCAL_NAME(); -#define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL LAYOUT_MEMBER_NAME) /** * Declaration of the private members of the const element subclass */ -#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ SoAValueWithConf LOCAL_NAME; -#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, STORE_MEMBER_NAME) \ - _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL STORE_MEMBER_NAME +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL LAYOUT_MEMBER_NAME /** * Parameters passed to element subclass constructor in operator[] */ -#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) (BOOST_PP_CAT(LOCAL_NAME, _)) +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) (BOOST_PP_CAT(LOCAL_NAME, _)) -#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL LAYOUT_MEMBER_NAME) /** * Direct access to column pointer and indexed access */ -#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ /* Column or scalar */ \ SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ return typename SoAAccessors:: \ @@ -250,13 +249,13 @@ struct ConstValueTraits { template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ } -#define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) /** * Direct access to column pointer (const) and indexed access. */ -#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME) \ +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ /* Column or scalar */ \ SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ return typename SoAAccessors:: \ @@ -269,19 +268,19 @@ struct ConstValueTraits { template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ } -#define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL STORE_MEMBER_NAME) +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) /** * SoA class member declaration (column pointers). */ -#define _DECLARE_VIEW_SOA_MEMBER_IMPL(STORE_NAME, STORE_MEMBER, LOCAL_NAME, DATA) \ +#define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _) = nullptr; -#define _DECLARE_VIEW_SOA_MEMBER(R, DATA, STORE_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(STORE_MEMBER_NAME, DATA)) +#define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) -#define generate_SoA_view(CLASS, STORES_LIST, VALUE_LIST) \ +#define generate_SoA_view(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ @@ -305,13 +304,13 @@ struct ConstValueTraits { \ template \ using SoAEigenValueWithConf = SoAEigenValue; \ - /** \ - * Helper/friend class allowing SoA introspection. \ + /** \ + * Helper/friend class allowing SoA introspection. \ */ \ struct SoAMetadata { \ friend CLASS; \ - /* Alias store types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ + /* Alias layout or view types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ @@ -325,8 +324,8 @@ struct ConstValueTraits { /* Trivial constuctor */ \ CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ \ - /* Constructor relying on user provided stores */ \ - SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), STORES_LIST)) \ + /* Constructor relying on user provided layouts or views */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), LAYOUTS_LIST)) \ : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ \ /* Constructor relying on individually provided column addresses */ \ @@ -383,7 +382,7 @@ struct ConstValueTraits { _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ } -#define generate_SoA_const_view(CLASS, STORES_LIST, VALUE_LIST) \ +#define generate_SoA_const_view(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ @@ -413,8 +412,8 @@ struct ConstValueTraits { */ \ struct SoAMetadata { \ friend CLASS; \ - /* Alias store types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_STORE_TYPE_ALIAS, ~, STORES_LIST) \ + /* Alias layout/view types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ @@ -428,8 +427,8 @@ struct ConstValueTraits { /* Trivial constuctor */ \ CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ \ - /* Constructor relying on user provided stores */ \ - SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, STORES_LIST)) \ + /* Constructor relying on user provided layouts or views */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, LAYOUTS_LIST)) \ : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ \ /* Constructor relying on individually provided column addresses */ \ diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc index 10b9c45fd..cbde717e7 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -31,8 +31,8 @@ void SiPixelROCsStatusAndMappingWrapperESProducer::produce(edm::EventSetup& even std::ifstream in(data_ / "cablingMap.bin", std::ios::binary); in.exceptions(std::ifstream::badbit | std::ifstream::failbit | std::ifstream::eofbit); // We use default alignment - auto objBuffer = std::make_unique(SiPixelROCsStatusAndMappingStore::computeDataSize(pixelgpudetails::MAX_SIZE)); - SiPixelROCsStatusAndMappingStore obj(objBuffer.get(), pixelgpudetails::MAX_SIZE); + auto objBuffer = std::make_unique(SiPixelROCsStatusAndMappingLayout::computeDataSize(pixelgpudetails::MAX_SIZE)); + SiPixelROCsStatusAndMappingLayout obj(objBuffer.get(), pixelgpudetails::MAX_SIZE); in.read(reinterpret_cast(obj.soaMetadata().data()), obj.soaMetadata().byteSize()); unsigned int modToUnpDefSize; in.read(reinterpret_cast(&modToUnpDefSize), sizeof(unsigned int)); diff --git a/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc b/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc index 1de1e1dda..bf701d94e 100644 --- a/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc +++ b/src/cudadev/plugin-SiPixelRawToDigi/SiPixelDigisSoAFromCUDA.cc @@ -22,7 +22,7 @@ class SiPixelDigisSoAFromCUDA : public edm::EDProducerExternalWork { edm::EDGetTokenT> digiGetToken_; edm::EDPutTokenT digiPutToken_; - SiPixelDigisCUDA::HostStoreAndBuffer digis_; + SiPixelDigisCUDA::HostStore digis_; size_t nDigis_; }; @@ -54,7 +54,7 @@ void SiPixelDigisSoAFromCUDA::produce(edm::Event& iEvent, const edm::EventSetup& // host memory to be allocated without a CUDA stream // - What if a CPU algorithm would produce the same SoA? We can't // use cudaMallocHost without a GPU... - auto dv = digis_.store(); + auto dv = digis_.view(); iEvent.emplace(digiPutToken_, nDigis_, dv.pdigi(), dv.rawIdArr(), dv.adc(), dv.clus()); digis_.reset(); diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index 00a1c132b..154be2546 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -1,4 +1,4 @@ -#include "DataFormats/SoAStore.h" +#include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" #include #include @@ -10,47 +10,47 @@ // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. -generate_SoA_store(SoA1LayoutTemplate, +GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, // predefined static scalars // size_t size; // size_t alignment; // columns: one value per element - SoA_column(double, x), - SoA_column(double, y), - SoA_column(double, z), - SoA_eigenColumn(Eigen::Vector3d, a), - SoA_eigenColumn(Eigen::Vector3d, b), - SoA_eigenColumn(Eigen::Vector3d, r), - SoA_column(uint16_t, color), - SoA_column(int32_t, value), - SoA_column(double *, py), - SoA_column(uint32_t, count), - SoA_column(uint32_t, anotherCount), + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(int32_t, value), + SOA_COLUMN(double *, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount), // scalars: one value for the whole structure - SoA_scalar(const char *, description), - SoA_scalar(uint32_t, someNumber) + SOA_SCALAR(const char *, description), + SOA_SCALAR(uint32_t, someNumber) ); using SoA1Layout = SoA1LayoutTemplate<>; // A 1 to 1 view of the store (except for unsupported types). generate_SoA_view(SoA1ViewTemplate, - SoA_view_store_list( - SoA_view_store(SoA1Layout, soa1) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(SoA1Layout, soa1) ), - SoA_view_value_list( - SoA_view_value(soa1, x), - SoA_view_value(soa1, y), - SoA_view_value(soa1, z), - SoA_view_value(soa1, color), - SoA_view_value(soa1, value), - SoA_view_value(soa1, py), - SoA_view_value(soa1, count), - SoA_view_value(soa1, anotherCount), - SoA_view_value(soa1, description), - SoA_view_value(soa1, someNumber) + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1, y), + SOA_VIEW_VALUE(soa1, z), + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1, value), + SOA_VIEW_VALUE(soa1, py), + SOA_VIEW_VALUE(soa1, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1, description), + SOA_VIEW_VALUE(soa1, someNumber) ) ); @@ -58,19 +58,19 @@ using SoA1View = SoA1ViewTemplate<>; // A partial view (artificial mix of store and view) generate_SoA_view(SoA1View2GTemplate, - SoA_view_store_list( - SoA_view_store(SoA1Layout, soa1), - SoA_view_store(SoA1View, soa1v) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(SoA1Layout, soa1), + SOA_VIEW_LAYOUT(SoA1View, soa1v) ), - SoA_view_value_list( - SoA_view_value(soa1, x), - SoA_view_value(soa1v, y), - SoA_view_value(soa1, color), - SoA_view_value(soa1v, value), - SoA_view_value(soa1v, count), - SoA_view_value(soa1, anotherCount), - SoA_view_value(soa1v, description), - SoA_view_value(soa1, someNumber) + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1v, y), + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1v, value), + SOA_VIEW_VALUE(soa1v, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1v, description), + SOA_VIEW_VALUE(soa1, someNumber) ) ); @@ -80,23 +80,23 @@ using SoA1View2G = SoA1View2GTemplate<>; // Same partial view, yet const. generate_SoA_const_view(SoA1View2Gconst, - SoA_view_store_list( - SoA_view_store(SoA1Layout, soa1), - SoA_view_store(SoA1View, soa1v) + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(SoA1Layout, soa1), + SOA_VIEW_LAYOUT(SoA1View, soa1v) ), - SoA_view_value_list( - SoA_view_value(soa1, x), - SoA_view_value(soa1v, y), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1v, y), /* Eigen columns are not supported in views. SoA_view_value(soa1, a, a), SoA_view_value(soa1, b, b), SoA_view_value(soa1, r, r), */ - SoA_view_value(soa1, color), - SoA_view_value(soa1v, value), - SoA_view_value(soa1v, count), - SoA_view_value(soa1, anotherCount), - SoA_view_value(soa1v, description), - SoA_view_value(soa1, someNumber) + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1v, value), + SOA_VIEW_VALUE(soa1v, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1v, description), + SOA_VIEW_VALUE(soa1, someNumber) ) ); From e3fdaea96da7de578271bcf79a17edb938d9dda1 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 6 Dec 2021 14:18:32 +0100 Subject: [PATCH 21/50] [cudadev] Moved SoA view generating macro to uppercase. --- src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h | 4 ++-- src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h | 6 +++--- src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h | 4 ++-- src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h | 2 +- src/cudadev/DataFormats/SoAView.h | 4 ++-- src/cudadev/test/SoAStoreAndView_t.cu | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 4145c5eb7..4414d4f80 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -23,7 +23,7 @@ class SiPixelClustersCUDA { // We use all defaults for the template parameters. using DeviceLayout = DeviceLayoutTemplate<>; - generate_SoA_view(DeviceViewTemplate, + GENERATE_SOA_VIEW(DeviceViewTemplate, SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceLayout, deviceLayout)), SOA_VIEW_VALUE_LIST( SOA_VIEW_VALUE(deviceLayout, moduleStart), // index of the first pixel of each module @@ -37,7 +37,7 @@ class SiPixelClustersCUDA { using DeviceView = DeviceViewTemplate<>; - generate_SoA_const_view(DeviceConstViewTemplate, + GENERATE_SOA_CONST_VIEW(DeviceConstViewTemplate, SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceView, deviceView)), SOA_VIEW_VALUE_LIST( SOA_VIEW_VALUE(deviceView, moduleStart), // index of the first pixel of each module diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index 8392bf083..6d6e60770 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -32,7 +32,7 @@ class SiPixelDigisCUDA { using HostDeviceLayout = HostDeviceLayoutTemplate<>; - generate_SoA_view(HostDeviceViewTemplate, + GENERATE_SOA_VIEW(HostDeviceViewTemplate, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice) ), @@ -46,7 +46,7 @@ class SiPixelDigisCUDA { using HostDeviceView = HostDeviceViewTemplate<>; - generate_SoA_view(DeviceFullViewTemplate, + GENERATE_SOA_VIEW(DeviceFullViewTemplate, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(DeviceOnlyLayout, deviceOnly), SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice) @@ -65,7 +65,7 @@ class SiPixelDigisCUDA { using DeviceFullView = DeviceFullViewTemplate<>; /* Device pixel view: this is a second generation view (view from view) */ - generate_SoA_const_view(DevicePixelConstViewTemplate, + GENERATE_SOA_CONST_VIEW(DevicePixelConstViewTemplate, /* We get out data from the DeviceFullView */ SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(DeviceFullView, deviceFullView) diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index 60417b447..aed7490cd 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -57,7 +57,7 @@ class TrackingRecHit2DSOAStore { // The hits layout does not use default alignment but a more relaxed one. using HitsLayout = HitsLayoutTemplate; - generate_SoA_view(HitsViewTemplate, + GENERATE_SOA_VIEW(HitsViewTemplate, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(HitsLayout, hitsLayout) ), @@ -97,7 +97,7 @@ class TrackingRecHit2DSOAStore { // The support objects layouts also not use default alignment but a more relaxed one. using SupportObjectsLayout = SupportObjectsLayoutTemplate; - generate_SoA_view(HitsAndSupportViewTemplate, + GENERATE_SOA_VIEW(HitsAndSupportViewTemplate, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(HitsLayout, hitsLayout), SOA_VIEW_LAYOUT(SupportObjectsLayout, supportObjectsLayout) diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index 40ee0c43d..e187670bd 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -27,7 +27,7 @@ GENERATE_SOA_LAYOUT(SiPixelROCsStatusAndMappingLayoutTemplate, using SiPixelROCsStatusAndMappingLayout = SiPixelROCsStatusAndMappingLayoutTemplate<>; -generate_SoA_const_view(SiPixelROCsStatusAndMappingConstViewTemplate, +GENERATE_SOA_CONST_VIEW(SiPixelROCsStatusAndMappingConstViewTemplate, SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SiPixelROCsStatusAndMappingLayout, mappingLayout)), SOA_VIEW_VALUE_LIST( SOA_VIEW_VALUE(mappingLayout, fed), diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 93791c28e..908abb64d 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -280,7 +280,7 @@ struct ConstValueTraits { #define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) -#define generate_SoA_view(CLASS, LAYOUTS_LIST, VALUE_LIST) \ +#define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ @@ -382,7 +382,7 @@ struct ConstValueTraits { _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ } -#define generate_SoA_const_view(CLASS, LAYOUTS_LIST, VALUE_LIST) \ +#define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index 154be2546..46efd5ed5 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -36,7 +36,7 @@ GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, using SoA1Layout = SoA1LayoutTemplate<>; // A 1 to 1 view of the store (except for unsupported types). -generate_SoA_view(SoA1ViewTemplate, +GENERATE_SOA_VIEW(SoA1ViewTemplate, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(SoA1Layout, soa1) ), @@ -57,7 +57,7 @@ generate_SoA_view(SoA1ViewTemplate, using SoA1View = SoA1ViewTemplate<>; // A partial view (artificial mix of store and view) -generate_SoA_view(SoA1View2GTemplate, +GENERATE_SOA_VIEW(SoA1View2GTemplate, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v) @@ -79,7 +79,7 @@ using SoA1View2G = SoA1View2GTemplate<>; // Same partial view, yet const. -generate_SoA_const_view(SoA1View2Gconst, +GENERATE_SOA_CONST_VIEW(SoA1View2Gconst, SOA_VIEW_LAYOUT_LIST( SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v) From 4805e5e32c798e0412ad6102cd29bce6deda4865 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 7 Dec 2021 11:43:08 +0100 Subject: [PATCH 22/50] [cudadev] Reverting SiPixelROCsStatusAndMapping to a struct of fixed sized arrays. The struct is passed around and mapped into a view only for use. --- .../CondFormats/SiPixelROCsStatusAndMapping.h | 11 ++++++++ .../SiPixelROCsStatusAndMappingWrapper.cc | 14 +++++----- .../SiPixelROCsStatusAndMappingWrapper.h | 27 +++++++++++++------ ...elROCsStatusAndMappingWrapperESProducer.cc | 8 ++---- 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index e187670bd..a2286721f 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -14,6 +14,17 @@ namespace pixelgpudetails { constexpr unsigned int MAX_SIZE_BYTE_BOOL = MAX_SIZE * sizeof(unsigned char); } // namespace pixelgpudetails +struct SiPixelROCsStatusAndMapping { + alignas(128) unsigned int fed[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int link[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int roc[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int rawId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int rocInDet[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int moduleId[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned char badRocs[pixelgpudetails::MAX_SIZE]; + alignas(128) unsigned int size = 0; +}; + GENERATE_SOA_LAYOUT(SiPixelROCsStatusAndMappingLayoutTemplate, SOA_COLUMN(unsigned int, fed), SOA_COLUMN(unsigned int, link), diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc index 6b19b1c07..2b7cc79cd 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc @@ -15,13 +15,12 @@ #include "CondFormats/SiPixelROCsStatusAndMappingWrapper.h" #include "CUDACore/copyAsync.h" -SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingLayout const& cablingMap, +SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const& cablingMap, std::vector modToUnp) : modToUnpDefault(modToUnp.size()), hasQuality_(true) { // TODO: check if cudaStreamDefault is appropriate - auto cablingMapMetadata = cablingMap.soaMetadata(); - cablingMapHostBuffer = cms::cuda::make_host_unique(cablingMapMetadata.byteSize(), cudaStreamDefault); - std::memcpy(cablingMapHostBuffer.get(), cablingMapMetadata.data(), cablingMapMetadata.byteSize()); + cablingMapHost = cms::cuda::make_host_unique(cudaStreamDefault); + std::memcpy(cablingMapHost.get(), &cablingMap, sizeof(SiPixelROCsStatusAndMapping)); std::copy(modToUnp.begin(), modToUnp.end(), modToUnpDefault.begin()); } @@ -29,13 +28,12 @@ SiPixelROCsStatusAndMappingConstView SiPixelROCsStatusAndMappingWrapper::getGPUP const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { // allocate - data.allocate(pixelgpudetails::MAX_SIZE, stream); + data.allocate(stream); // transfer - cms::cuda::copyAsync(data.cablingMapDeviceBuffer, this->cablingMapHostBuffer, - data.cablingMapDevice.soaMetadata().byteSize(), stream); + cms::cuda::copyAsync(data.cablingMapDevice, this->cablingMapHost, stream); } ); - return SiPixelROCsStatusAndMappingConstView(data.cablingMapDevice); + return data.cablingMapDeviceView; } const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 927fb48f0..483088b67 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -14,7 +14,7 @@ class SiPixelROCsStatusAndMappingWrapper { public: /* This is using a layout as the size is needed. TODO: use views when views start embedding size. */ - explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMappingLayout const &cablingMap, + explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const &cablingMap, std::vector modToUnp); bool hasQuality() const { return hasQuality_; } @@ -29,16 +29,27 @@ class SiPixelROCsStatusAndMappingWrapper { std::vector> modToUnpDefault; bool hasQuality_; - cms::cuda::host::unique_ptr cablingMapHostBuffer; // host pined memory for cabling map. + cms::cuda::host::unique_ptr cablingMapHost; // host pined memory for cabling map. struct GPUData { - void allocate(size_t size, cudaStream_t stream) { - cablingMapDeviceBuffer = cms::cuda::make_device_unique( - SiPixelROCsStatusAndMappingLayout::computeDataSize(size), stream); - cablingMapDevice = SiPixelROCsStatusAndMappingLayout(cablingMapDeviceBuffer.get(), size); + void allocate(cudaStream_t stream) { + cablingMapDevice = cms::cuda::make_device_unique(stream); + // Populate the view with individual column pointers + auto & cmd = *cablingMapDevice; + cablingMapDeviceView = SiPixelROCsStatusAndMappingConstView( + cmd.fed, // Those are array pointers (in device, but we won't dereference them here). + cmd.link, + cmd.roc, + cmd.rawId, + cmd.rocInDet, + cmd.moduleId, + cmd.badRocs, + &cmd.size // This is a scalar, we need the address-of operator + ); } - cms::cuda::device::unique_ptr cablingMapDeviceBuffer; - SiPixelROCsStatusAndMappingLayout cablingMapDevice = SiPixelROCsStatusAndMappingLayout(nullptr, 0); // map struct in GPU + cms::cuda::device::unique_ptr cablingMapDevice; + SiPixelROCsStatusAndMappingConstView cablingMapDeviceView; // map struct in GPU + }; cms::cuda::ESProduct gpuData_; diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc index cbde717e7..c77e1ed35 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelROCsStatusAndMappingWrapperESProducer.cc @@ -30,16 +30,12 @@ void SiPixelROCsStatusAndMappingWrapperESProducer::produce(edm::EventSetup& even { std::ifstream in(data_ / "cablingMap.bin", std::ios::binary); in.exceptions(std::ifstream::badbit | std::ifstream::failbit | std::ifstream::eofbit); - // We use default alignment - auto objBuffer = std::make_unique(SiPixelROCsStatusAndMappingLayout::computeDataSize(pixelgpudetails::MAX_SIZE)); - SiPixelROCsStatusAndMappingLayout obj(objBuffer.get(), pixelgpudetails::MAX_SIZE); - in.read(reinterpret_cast(obj.soaMetadata().data()), obj.soaMetadata().byteSize()); + SiPixelROCsStatusAndMapping obj; + in.read(reinterpret_cast(&obj), sizeof(SiPixelROCsStatusAndMapping)); unsigned int modToUnpDefSize; in.read(reinterpret_cast(&modToUnpDefSize), sizeof(unsigned int)); std::vector modToUnpDefault(modToUnpDefSize); in.read(reinterpret_cast(modToUnpDefault.data()), modToUnpDefSize); - // SiPixelROCsStatusAndMappingWrapper constructor will copy the objBuffer to a pinned host memory buffer - // the deallocation of objBuffer at the end of this scope is intentional. eventSetup.put(std::make_unique(obj, std::move(modToUnpDefault))); } } From cbac317590256a9007d405c5056a038b3d53b00f Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 8 Dec 2021 12:21:34 +0100 Subject: [PATCH 23/50] [cudadev] Removed dedundant const specifier. --- src/cudadev/DataFormats/SoAView.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 908abb64d..9f693b5e9 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -448,7 +448,7 @@ struct ConstValueTraits { \ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ - const const_element operator[](size_t index) const { \ + const_element operator[](size_t index) const { \ return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ } \ \ From 8bcecbea062a03c4b20dc38c8cb103138dbbfd93 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 8 Dec 2021 16:12:26 +0100 Subject: [PATCH 24/50] [cudadev] Moved SoA templates to cms::soa namespace. --- src/cudadev/DataFormats/SoACommon.h | 12 ++++ src/cudadev/DataFormats/SoALayout.h | 15 ++--- src/cudadev/DataFormats/SoAView.h | 94 +++++++++++++++-------------- 3 files changed, 70 insertions(+), 51 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index d5cf7050e..200e0a79d 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -36,6 +36,8 @@ // compile-time sized SoA +namespace cms::soa { + // Helper template managing the value within it column // The optional compile time alignment parameter enables informing the // compiler of alignment (enforced by caller). @@ -147,13 +149,19 @@ inline size_t alignSize(size_t size, size_t alignment = 128) { return 0; } +} // namespace cms::soa + /* declare "scalars" (one value shared across the whole SoA) and "columns" (one value per element) */ #define _VALUE_TYPE_SCALAR 0 #define _VALUE_TYPE_COLUMN 1 #define _VALUE_TYPE_EIGEN_COLUMN 2 +namespace cms::soa { + enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COLUMN, eigen = _VALUE_TYPE_EIGEN_COLUMN }; +} // namespace cms::soa + #define SOA_SCALAR(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) #define SOA_COLUMN(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) #define SOA_EIGEN_COLUMN(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) @@ -174,6 +182,8 @@ enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COL IF_COLUMN, \ BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), IF_EIGEN_COLUMN, BOOST_PP_EMPTY()))) +namespace cms::soa { + /* Column accessors: templates implementing the global accesors (soa::x() and soa::x(index) */ enum class SoAAccessType: bool { mutableAccess, constAccess }; @@ -243,4 +253,6 @@ struct SoAAccessors{ * hints the compiler that it can expect column pointers to be aligned */ enum class AlignmentEnforcement : bool { Relaxed, Enforced }; +} // namespace cms::soa + #endif // ndef DataStructures_SoACommon_h diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 3ea8ec414..5e99bef97 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -98,19 +98,19 @@ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::scalar; \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::column; \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column; \ CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Eigen column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = SoAColumnType::eigen; \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }) #define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME @@ -217,10 +217,11 @@ * A macro defining a SoA layout (collection of scalars and columns of equal lengths) */ #define GENERATE_SOA_LAYOUT(CLASS, ...) \ - template \ + template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ + typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ \ /* For CUDA applications, we align to the 128 bytes of the cache lines. \ * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ @@ -233,13 +234,13 @@ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ - using SoAEigenValueWithConf = SoAEigenValue; \ + using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ /* dump the SoA internal structure */ \ SOA_HOST_ONLY \ static void dump(size_t nElements) { \ diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 9f693b5e9..b8207c392 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -38,6 +38,8 @@ * */ +namespace cms::soa { + /* Traits for the different column type scenarios */ /* Value traits passes the class as is in the case of column type and return * an empty class with functions returning non-scalar as accessors. */ @@ -62,6 +64,8 @@ struct ConstValueTraits { // Any attempt to do anything with the eigen value a const element will fail. }; +} // namespace cms::soa; + #include /* * Members definitions macros for viewa @@ -81,13 +85,13 @@ struct ConstValueTraits { #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ typedef typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, LAYOUT_MEMBER) \ BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ - constexpr static SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ SOA_HOST_DEVICE_INLINE \ DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ }; \ - static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != SoAColumnType::eigen, \ + static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::eigen, \ "Eigen columns not supported in views."); #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -123,15 +127,15 @@ struct ConstValueTraits { * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ - (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ - static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != SoAColumnType::eigen, \ - "Eigen values not supported in views"); \ - auto addr = LAYOUT.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ - if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ - if (reinterpret_cast(addr) % byteAlignment) \ - throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ - return addr; \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ + static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != cms::soa::SoAColumnType::eigen, \ + "Eigen values not supported in views"); \ + auto addr = LAYOUT.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(addr) % byteAlignment) \ + throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ + return addr; \ }())) #define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -200,8 +204,8 @@ struct ConstValueTraits { * Declaration of the private members of the const element subclass */ #define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - const ConstValueTraits< \ - SoAConstValueWithConf, \ + const cms::soa::ConstValueTraits< \ + SoAConstValueWithConf, \ BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, LOCAL_NAME) \ > BOOST_PP_CAT(LOCAL_NAME, _); @@ -236,17 +240,17 @@ struct ConstValueTraits { /** * Direct access to column pointer and indexed access */ -#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - /* Column or scalar */ \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ - return typename SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ - } \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ - return typename SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + } \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ } #define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -255,17 +259,17 @@ struct ConstValueTraits { /** * Direct access to column pointer (const) and indexed access. */ -#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - /* Column or scalar */ \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ - return typename SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ - } \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) const { \ - return typename SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + } \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) const { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType:: \ + template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ } #define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -281,11 +285,12 @@ struct ConstValueTraits { BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) #define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ - template \ + template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ - \ + typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ + \ /* For CUDA applications, we align to the 128 bytes of the cache lines. \ * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ * up to compute capability 8.X. \ @@ -297,13 +302,13 @@ struct ConstValueTraits { alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ - using SoAEigenValueWithConf = SoAEigenValue; \ + using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ /** \ * Helper/friend class allowing SoA introspection. \ */ \ @@ -383,10 +388,11 @@ struct ConstValueTraits { } #define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ - template \ + template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ + typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ \ /* For CUDA applications, we align to the 128 bytes of the cache lines. \ * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ @@ -399,13 +405,13 @@ struct ConstValueTraits { alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ - using SoAEigenValueWithConf = SoAEigenValue; \ + using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ \ /** \ * Helper/friend class allowing SoA introspection. \ From 4dc53083d6e7b8330991497373689aaf862a5a2f Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 9 Dec 2021 11:09:18 +0100 Subject: [PATCH 25/50] [cudadev] Fixed missing host device declaration for constructor. --- src/cudadev/DataFormats/SoAView.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index b8207c392..7dc19ffa2 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -52,14 +52,14 @@ struct ConstValueTraits : public C { using C::C; }; template struct ConstValueTraits { // Just take to SoAValue type to generate the right constructor. - ConstValueTraits(size_t, const typename C::valueType *) {} + SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType *) {} // Any attempt to do anything with the "scalar" value a const element will fail. }; template struct ConstValueTraits { // Just take to SoAValue type to generate the right constructor. - ConstValueTraits(size_t, const typename C::valueType *) {} + SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType *) {} // TODO: implement // Any attempt to do anything with the eigen value a const element will fail. }; From b50d5a9fccf4eabf7f7a320502a9f84ba6ceaa0a Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 9 Dec 2021 11:43:45 +0100 Subject: [PATCH 26/50] [cudadev] Marked SoA layout clone function as const. --- src/cudadev/DataFormats/SoALayout.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 5e99bef97..82fd25383 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -269,7 +269,7 @@ SOA_HOST_DEVICE_INLINE size_t byteAlignment() const { return CLASS::byteAlignment; } \ SOA_HOST_DEVICE_INLINE std::byte* data() const { return parent_.mem_; } \ SOA_HOST_DEVICE_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ - SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) { return CLASS(addr, parent_.nElements_); } \ + SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) const { return CLASS(addr, parent_.nElements_); } \ _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ \ private: \ From 988e8db615b61466740f19c436f6c56a298c4ad7 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 9 Dec 2021 16:12:49 +0100 Subject: [PATCH 27/50] [cudadev] Added const correctness/limitations in SoAMetadata subclasses. --- src/cudadev/DataFormats/SoALayout.h | 29 ++++++++++++++++++++++------- src/cudadev/DataFormats/SoAView.h | 8 ++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 82fd25383..6eb53d5bf 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -94,24 +94,35 @@ */ #define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Scalar */ \ + VALUE_TYPE, \ + /* Scalar */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ - CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Column */ \ + CPP_TYPE const * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ + CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ + , \ + /* Column */ \ + CPP_TYPE const * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ + CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment; \ - } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + } \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column; \ - CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }, /* Eigen column */ \ + , \ + /* Eigen column */ \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + } \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ - CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); }) + CPP_TYPE::Scalar const * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ + CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ + ) #define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME @@ -267,11 +278,14 @@ SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ SOA_HOST_DEVICE_INLINE size_t byteSize() const { return parent_.byteSize_; } \ SOA_HOST_DEVICE_INLINE size_t byteAlignment() const { return CLASS::byteAlignment; } \ - SOA_HOST_DEVICE_INLINE std::byte* data() const { return parent_.mem_; } \ + SOA_HOST_DEVICE_INLINE std::byte* data() { return parent_.mem_; } \ + SOA_HOST_DEVICE_INLINE const std::byte* data() const { return parent_.mem_; } \ SOA_HOST_DEVICE_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) const { return CLASS(addr, parent_.nElements_); } \ _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ \ + SoAMetadata & operator=(const SoAMetadata &) = delete; \ + SoAMetadata(const SoAMetadata &) = delete; \ private: \ SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ const CLASS& parent_; \ @@ -279,6 +293,7 @@ }; \ friend SoAMetadata; \ SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + SOA_HOST_DEVICE_INLINE SoAMetadata soaMetadata() { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ CLASS() \ diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 7dc19ffa2..b1314c931 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -319,12 +319,17 @@ struct ConstValueTraits { \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ + \ + /* Forbid copying to avoid const correctness evasion */ \ + SoAMetadata & operator=(const SoAMetadata &) = delete; \ + SoAMetadata(const SoAMetadata &) = delete; \ private: \ SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ const CLASS& parent_; \ }; \ friend SoAMetadata; \ SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + SOA_HOST_DEVICE_INLINE SoAMetadata soaMetadata() { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ @@ -423,6 +428,9 @@ struct ConstValueTraits { \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ + \ + SoAMetadata & operator=(const SoAMetadata &) = delete; \ + SoAMetadata(const SoAMetadata &) = delete; \ private: \ SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ const CLASS& parent_; \ From cf212d257ae1fd3b019d6e3ec9fc70c605d2e64f Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Sat, 11 Dec 2021 22:31:05 +0100 Subject: [PATCH 28/50] [cudadev] Added SoA general explanation. Added symbolic names for cache line sizes/default alignment. --- src/cudadev/DataFormats/SoA.md | 145 ++++++++++++++++++++++++++++ src/cudadev/DataFormats/SoACommon.h | 8 ++ src/cudadev/DataFormats/SoALayout.h | 3 +- src/cudadev/DataFormats/SoAView.h | 10 +- 4 files changed, 161 insertions(+), 5 deletions(-) create mode 100644 src/cudadev/DataFormats/SoA.md diff --git a/src/cudadev/DataFormats/SoA.md b/src/cudadev/DataFormats/SoA.md new file mode 100644 index 000000000..0998f8e98 --- /dev/null +++ b/src/cudadev/DataFormats/SoA.md @@ -0,0 +1,145 @@ +# Structure of array (SoA) generation + +The two header files [`SoALayout.h`](SoALayout.h) and [`SoAView.h`](SoAView.h) define preprocessor macros that allow generating SoA +classes. The SoA classes generate multiple, aligned column from a memory buffer. The memory buffer is allocated separately by the +user, and can be located in a memory space different from the local one (for example, a SoA located in a GPU device memory is be +fully pre-defined on the host and the resulting structure is passed to the GPU kernel). + +This columnar storage allows efficient memory access by GPU kernels (coalesced access on cache line aligned data) and possibly +vectorization. + +Additionally, templation of the layout and view classes will allow compile-time variations of accesses and checks: verification of +alignment and corresponding compiler hinting, cache strategy (non-coherent, streaming with immediate invalidation), range checking. + +## Layout + +`SoALayout` is a macro generated templated class that subdivides a provided buffer into a collection of columns, Eigen columns and +scalars. The buffer is expected to be aligned with a selectable alignment defaulting to the CUDA GPU cache line (128 bytes). All +columns and scalars within a `SoALayout` will be individually aligned, leaving padding at the end of each if necessary. Eigen columns +have each component of the vector or matrix properly aligned in individual column (by defining the stride between components). Only +compile-time sized Eigen vectors and matrices are supported. Scalar members are members of layout with one element, irrespective of +the size of the layout. + +Static utility functions automatically compute the byte size of a layout, taking into account all its columns and alignment. + +## View + +`SoAView` is a macro generated templated class allowing access to columns defined in one or multiple `SoALayout`s or `SoAViews`. The +view can be generated in a constant and non-constant flavors. All view flavors provide with the same interface where scalar elements +are accessed with an `operator()`: `soa.scalar()` while columns (Eigen or not) are accessed via a array of structure (AoS) -like +syntax: `soa[index].x()`. The "struct" object returned by `operator[]` can be used as a shortcut: +`auto si = soa[index]; si.z() = si.x() + zi.y();` + +A view can be instanciated by being passed the layout(s) and view(s) it is defined against, or column by column. + +## SoAMetadata subclass + +In order to no clutter the namespace of the generated class, a subclass name `SoAMetadata` is generated. Its instanciated with the +`soaMetadata()` member function and contains various utility functions, like `size()` (number of elements in the SoA), `byteSize()`, +`byteAlignment()`, `data()` (a pointer to the buffer). A `nextByte()` function computes the first byte of a structure right after a +layout, allowing using a single buffer for multiple layouts. + +## Examples + +A layout can be defined as: + +```C++ +#include "DataFormats/SoALayout.h" + +GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, + // predefined static scalars + // size_t size; + // size_t alignment; + + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(int32_t, value), + SOA_COLUMN(double *, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount), + + // scalars: one value for the whole structure + SOA_SCALAR(const char *, description), + SOA_SCALAR(uint32_t, someNumber) +); + +// Default template parameters are < +// size_t ALIGNMENT = cms::soa::CacheLineSize::defaultSize, +// cms::soa::AlignmentEnforcement ALIGNMENT_ENFORCEMENT = cms::soa::AlignmentEnforcement::Relaxed +// > +using SoA1Layout = SoA1LayoutTemplate<>; + +using SoA1LayoutAligned = SoA1LayoutTemplate; +``` + +The buffer of the proper size is allocated, and the layout is populated with: + +```C++ +// Allocation of aligned +size_t elements = 100; +using AlignedBuffer = std::unique_ptr; +AlignedBuffer h_buf (reinterpret_cast(aligned_alloc(SoA1LayoutAligned::byteAlignment, SoA1LayoutAligned::computeDataSize(elements))), std::free); +SoA1LayoutAligned soaLayout(h_buf.get(), elements); +``` + +A view will derive its column types from one or multiple layouts. The macro generating the view takes a list of layouts or views it +gets is data from as a first parameter, and the selection of the columns the view will give access to as a second parameter. + +```C++ +// A 1 to 1 view of the layout (except for unsupported types). +GENERATE_SOA_VIEW(SoA1ViewTemplate, + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(SoA1Layout, soa1) + ), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1, y), + SOA_VIEW_VALUE(soa1, z), + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1, value), + SOA_VIEW_VALUE(soa1, py), + SOA_VIEW_VALUE(soa1, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1, description), + SOA_VIEW_VALUE(soa1, someNumber) + ) +); + +using SoA1View = SoA1ViewTemplate<>; + +SoA1View soaView(soaLayout); + +for (size_t i=0; i < soaLayout.soaMetadata().size(); ++i) { + auto si = soaView[i]; + si.x() = si.y() = i; + soaView.someNumber() += i; +} +``` + +## Template parameters + +The template parameters are: +- Byte aligment (defaulting to the nVidia GPU cache line size (128 bytes)) +- Alignment enforcement (`Relaxed` or `Enforced`). When enforced, the alignment will be checked at construction time, and the accesses +are done with compiler hinting (using the widely supported `__builtin_assume_aligned` intrinsic). + +## Using SoA layouts and views with GPUs + +Instanciation of views and layouts is preferably done on the CPU side. The view object is lightweight, with only one pointer per +column (size to be added later). Extra view class can be generated to restrict this number of pointers to the strict minimum in +scenarios where only a subset of columns are used in a given GPU kernel. + +## Current status and further improvements + +- The layout and views support scalars and columns, alignment and alignment enforcement and hinting. +- Cache access style will be added at in subsequent updates. +- `__restrict__` compiler hinting will be added later. +- Optional range checking will be added later. This implies adding support for size to views and will restrict views to columns of +equal size. +- Eigen access was validated with an earlier scheme, but will be ported back to the current one later. \ No newline at end of file diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 200e0a79d..d023ac0ae 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -253,6 +253,14 @@ struct SoAAccessors{ * hints the compiler that it can expect column pointers to be aligned */ enum class AlignmentEnforcement : bool { Relaxed, Enforced }; +struct CacheLineSize { + static constexpr size_t NvidiaGPU = 128; + static constexpr size_t IntelCPU = 64; + static constexpr size_t AMDCPU = 64; + static constexpr size_t ARMCPU = 64; + static constexpr size_t defaultSize = NvidiaGPU; +}; + } // namespace cms::soa #endif // ndef DataStructures_SoACommon_h diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 6eb53d5bf..3b9f45c13 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -228,7 +228,8 @@ * A macro defining a SoA layout (collection of scalars and columns of equal lengths) */ #define GENERATE_SOA_LAYOUT(CLASS, ...) \ - template \ + template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index b1314c931..15b072ebb 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -285,7 +285,8 @@ struct ConstValueTraits { BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) #define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ - template \ + template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ @@ -295,7 +296,7 @@ struct ConstValueTraits { * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ * up to compute capability 8.X. \ */ \ - constexpr static size_t defaultAlignment = 128; \ + constexpr static size_t defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ constexpr static size_t byteAlignment = ALIGNMENT; \ constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ constexpr static size_t conditionalAlignment = \ @@ -393,7 +394,8 @@ struct ConstValueTraits { } #define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ - template \ + template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ @@ -403,7 +405,7 @@ struct ConstValueTraits { * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ * up to compute capability 8.X. \ */ \ - constexpr static size_t defaultAlignment = 128; \ + constexpr static size_t defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ constexpr static size_t byteAlignment = ALIGNMENT; \ constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ constexpr static size_t conditionalAlignment = \ From 99114046770d567a11e252a55c91963b959018a2 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 13 Dec 2021 15:33:10 +0100 Subject: [PATCH 29/50] [cudadev] Replaced hardcoded value with symbolic equivalent. Added more notes in the SoA.md plans section. --- src/cudadev/DataFormats/SoA.md | 3 ++- src/cudadev/DataFormats/SoACommon.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cudadev/DataFormats/SoA.md b/src/cudadev/DataFormats/SoA.md index 0998f8e98..267f4a898 100644 --- a/src/cudadev/DataFormats/SoA.md +++ b/src/cudadev/DataFormats/SoA.md @@ -142,4 +142,5 @@ scenarios where only a subset of columns are used in a given GPU kernel. - `__restrict__` compiler hinting will be added later. - Optional range checking will be added later. This implies adding support for size to views and will restrict views to columns of equal size. -- Eigen access was validated with an earlier scheme, but will be ported back to the current one later. \ No newline at end of file +- Eigen access was validated with an earlier scheme, but will be ported back to the current one later. Some alignment information can be +passed to Eigen strcutures. Const variants of access classes should be created to ensure we cannot leak mutable access to const products. diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index d023ac0ae..edcb0b56b 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -91,6 +91,7 @@ class SoAConstValue { }; // Helper template managing the value within it column +// TODO Create a const variant to avoid leaking mutable access. template class SoAEigenValue { public: @@ -126,7 +127,7 @@ class SoAEigenValue { // Helper template to avoid commas in macro template struct EigenConstMapMaker { - typedef Eigen::Map> Type; + typedef Eigen::Map> Type; class DataHolder { public: DataHolder(const typename C::Scalar* data) : data_(data) {} From a8bf1ffc77fdf19d96ed2bf86afd9a4688c7bc08 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 13 Dec 2021 21:08:01 +0100 Subject: [PATCH 30/50] [cudadev] Fixed redundant const specifier. --- src/cudadev/DataFormats/SoAView.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 15b072ebb..bf5c22441 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -377,7 +377,7 @@ struct ConstValueTraits { \ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ - const const_element operator[](size_t index) const { \ + const_element operator[](size_t index) const { \ return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ } \ \ From 7b24d9953650c8cdce0fe6efbff1f3282eafccb8 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 13 Dec 2021 22:20:06 +0100 Subject: [PATCH 31/50] [cudadev] Limited operator=() of elements to non-scalars. --- src/cudadev/DataFormats/SoAView.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index bf5c22441..e2ac43fbb 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -215,7 +215,9 @@ struct ConstValueTraits { /** * Generator of the member-by-member copy operator of the element subclass. */ -#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) LOCAL_NAME() = other.LOCAL_NAME(); +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + if constexpr (SoAMetadata:: BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::scalar) \ + LOCAL_NAME() = other.LOCAL_NAME(); #define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL LAYOUT_MEMBER_NAME) From 39865500666e9f78a35e9ff4051acda79f8e77e3 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 13 Dec 2021 23:13:36 +0100 Subject: [PATCH 32/50] [cudadev] Removed aliasing of SoAs in local memory. --- src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h index 8a92dc98e..9d4bf37f2 100644 --- a/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h +++ b/src/cudadev/plugin-SiPixelRecHits/gpuPixelRecHits.h @@ -16,9 +16,9 @@ namespace gpuPixelRecHits { __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const* __restrict__ cpeParams, BeamSpotPOD const* __restrict__ bs, - SiPixelDigisCUDA::DevicePixelConstView pdigis, + SiPixelDigisCUDA::DevicePixelConstView digis, int numElements, - SiPixelClustersCUDA::DeviceConstView const pclusters, + SiPixelClustersCUDA::DeviceConstView clusters, TrackingRecHit2DSOAStore* phits) { // FIXME // the compiler seems NOT to optimize loads from views (even in a simple test case) @@ -30,9 +30,6 @@ namespace gpuPixelRecHits { auto& hits = *phits; - auto const digis = pdigis; // the copy is intentional! - auto const& clusters = pclusters; - // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) if (0 == blockIdx.x) { auto& agc = hits.averageGeometry(); From c0045808b8e0369c7003ad45499d33f88021d3ac Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 14 Dec 2021 09:05:12 +0100 Subject: [PATCH 33/50] [cudadev] Added planned features to SoA.md --- src/cudadev/DataFormats/SoA.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cudadev/DataFormats/SoA.md b/src/cudadev/DataFormats/SoA.md index 267f4a898..d6e169427 100644 --- a/src/cudadev/DataFormats/SoA.md +++ b/src/cudadev/DataFormats/SoA.md @@ -144,3 +144,5 @@ scenarios where only a subset of columns are used in a given GPU kernel. equal size. - Eigen access was validated with an earlier scheme, but will be ported back to the current one later. Some alignment information can be passed to Eigen strcutures. Const variants of access classes should be created to ensure we cannot leak mutable access to const products. +- Improve `dump()` function and turn it into a more classic `operator<<()`. +- Create a mechanism to derive trivial views and const views from a single layout. From a2fad0a8ad8b2c22d2d236f3c02813fd1132dc99 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 14 Dec 2021 15:28:29 +0100 Subject: [PATCH 34/50] [cudadev] Added support for switchable restrict and cache style selection. Defaulting to __restrict__ and non-coherent cache for const views. --- src/cudadev/DataFormats/SoACommon.h | 79 ++++++++++++++++++++++++----- src/cudadev/DataFormats/SoAView.h | 10 ++-- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index edcb0b56b..d31c77ea5 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -38,40 +38,91 @@ namespace cms::soa { +enum class CacheAccessStyle : char { Default, NonCoherent, Streaming }; + +enum class RestrictQualify : bool { Enabled, Disabled, Default = Disabled }; + +template +struct add_restrict {}; + +template +struct add_restrict { + typedef T Value; + typedef T * __restrict__ Pointer; + typedef T & __restrict__ Reference; + typedef const T ConstValue; + typedef const T * __restrict__ PointerToConst; + typedef const T & __restrict__ ReferenceToConst; +}; + +template +struct add_restrict { + typedef T Value; + typedef T * Pointer; + typedef T & Reference; + typedef const T ConstValue; + typedef const T * PointerToConst; + typedef const T & ReferenceToConst; +}; + +template +SOA_HOST_DEVICE_INLINE T readWithCacheStyle (const T * addr) { + if constexpr (CACHE_ACCESS_STYLE == CacheAccessStyle::NonCoherent) { + return LOAD_INCOHERENT(addr); + } else if constexpr (CACHE_ACCESS_STYLE == CacheAccessStyle::Streaming) { + return LOAD_STREAMED(addr); + } + return *addr; +} + // Helper template managing the value within it column // The optional compile time alignment parameter enables informing the // compiler of alignment (enforced by caller). -template +template class SoAValue { public: + typedef add_restrict Restr; + typedef typename Restr::Value Val; + typedef typename Restr::Pointer Ptr; + typedef typename Restr::Reference Ref; + typedef typename Restr::PointerToConst PtrToConst; SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE T& operator()() { return alignedCol()[idx_]; } - SOA_HOST_DEVICE_INLINE T operator()() const { return *(alignedCol() + idx_); } - SOA_HOST_DEVICE_INLINE T* operator&() { return &alignedCol()[idx_]; } - SOA_HOST_DEVICE_INLINE const T* operator&() const { return &alignedCol()[idx_]; } + SOA_HOST_DEVICE_INLINE Ref operator()() { return alignedCol()[idx_]; } + SOA_HOST_DEVICE_INLINE Val operator()() const { return *(alignedCol() + idx_); } + SOA_HOST_DEVICE_INLINE Ptr operator&() { return &alignedCol()[idx_]; } + SOA_HOST_DEVICE_INLINE PtrToConst operator&() const { return &alignedCol()[idx_]; } template - SOA_HOST_DEVICE_INLINE T& operator=(const T2& v) { + SOA_HOST_DEVICE_INLINE Ref operator=(const T2& v) { return alignedCol()[idx_] = v; } - typedef T valueType; + typedef Val valueType; static constexpr auto valueSize = sizeof(T); private: - SOA_HOST_DEVICE_INLINE T* alignedCol() const { + SOA_HOST_DEVICE_INLINE Ptr alignedCol() const { if constexpr (ALIGNMENT) { - return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); } - return col_; + return reinterpret_cast(col_); } size_t idx_; T* col_; }; // Helper template managing the value within it column -template +template class SoAConstValue { public: + typedef add_restrict Restr; + typedef typename Restr::Value Val; + typedef typename Restr::Pointer Ptr; + typedef typename Restr::Reference Ref; + typedef typename Restr::PointerToConst PtrToConst; SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T* col) : idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ SOA_HOST_DEVICE_INLINE T operator()() const { return *(alignedCol() + idx_); } @@ -80,11 +131,11 @@ class SoAConstValue { static constexpr auto valueSize = sizeof(T); private: - SOA_HOST_DEVICE_INLINE const T* alignedCol() const { + SOA_HOST_DEVICE_INLINE PtrToConst alignedCol() const { if constexpr (ALIGNMENT) { - return __builtin_assume_aligned(col_, ALIGNMENT); + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); } - return col_; + return reinterpret_cast(col_) ; } size_t idx_; const T* col_; diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index e2ac43fbb..d8dfc2589 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -397,7 +397,9 @@ struct ConstValueTraits { #define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ + cms::soa::AlignmentEnforcement ALIGNMENT_ENFORCEMENT = cms::soa::AlignmentEnforcement::Relaxed, \ + cms::soa::CacheAccessStyle CACHE_ACCESS_STYLE = cms::soa::CacheAccessStyle::NonCoherent, \ + cms::soa::RestrictQualify RESTRICT_QUALIFY = cms::soa::RestrictQualify::Enabled> \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ @@ -412,12 +414,14 @@ struct ConstValueTraits { constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ + constexpr static cms::soa::CacheAccessStyle cacheAccessStyle = CACHE_ACCESS_STYLE; \ + constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = cms::soa::SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ From 87d084a6ba96cc86bab4b72401f9bf0aefb63c6a Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 15 Dec 2021 17:48:27 +0100 Subject: [PATCH 35/50] [cudadev] Moved accesses from value to const ref so that we get the benefit of __restrict__ Created an example to easily validate the effect __restrict__ The result can be checked compiling with the `-ptx` option instead of `-c`, and then grepped with:. ``` $ cat obj/cudadev/test/SoAStoreAndView_t.cu.ptx | c++filt | egrep '(.visible|(ld|st).global)' --color .visible .entry aAMDef(SoA1ViewTemplate<128ul, (cms::soa::AlignmentEnforcement)0, (cms::soa::CacheAccessStyle)0, (cms::soa::RestrictQualify)1>, unsigned long)( ld.global.f64 %fd1, [%rd21]; ld.global.f64 %fd2, [%rd20]; st.global.f64 [%rd22], %fd3; ld.global.f64 %fd4, [%rd21]; ld.global.f64 %fd5, [%rd20]; st.global.f64 [%rd23], %fd6; .visible .entry aAMRestrict(SoA1ViewTemplate<128ul, (cms::soa::AlignmentEnforcement)0, (cms::soa::CacheAccessStyle)0, (cms::soa::RestrictQualify)0>, unsigned long)( ld.global.nc.f64 %fd1, [%rd21]; ld.global.nc.f64 %fd2, [%rd20]; st.global.f64 [%rd22], %fd3; st.global.f64 [%rd23], %fd4; .visible .entry aAMNC(SoA1ViewTemplate<128ul, (cms::soa::AlignmentEnforcement)0, (cms::soa::CacheAccessStyle)1, (cms::soa::RestrictQualify)1>, unsigned long)( ld.global.f64 %fd1, [%rd21]; ld.global.f64 %fd2, [%rd20]; st.global.f64 [%rd22], %fd3; ld.global.f64 %fd4, [%rd21]; ld.global.f64 %fd5, [%rd20]; st.global.f64 [%rd23], %fd6; .visible .entry aAMRestrict(SoA1ViewTemplate<128ul, (cms::soa::AlignmentEnforcement)0, (cms::soa::CacheAccessStyle)1, (cms::soa::RestrictQualify)0>, unsigned long)( ld.global.nc.f64 %fd1, [%rd21]; ld.global.nc.f64 %fd2, [%rd20]; st.global.f64 [%rd22], %fd3; st.global.f64 [%rd23], %fd4; ``` The hint from restrict qualifier is used by the compiler to load values from global memory only once and via the non-coherent cache. The cache access styles are not implemented, and hence have no effect. --- src/cudadev/DataFormats/SoACommon.h | 34 ++++++++++++++------ src/cudadev/DataFormats/SoAView.h | 20 ++++++++---- src/cudadev/test/SoAStoreAndView_t.cu | 46 +++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 16 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index d31c77ea5..317bc19f9 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -25,13 +25,13 @@ #if defined(__CUDACC__) && defined(__CUDA_ARCH__) // Read a pointer content via read-only (non coherent) cache. -#define LOAD_INCOHERENT(A) __ldg(A) -#define LOAD_STREAMED(A) __ldcs(A) -#define STORE_STREAMED(A, V) __stcs(A, V) +#define LOAD_NONCOHERENT(A) __ldg(A) +#define LOAD_STREAMING(A) __ldcs(A) +#define STORE_STREAMING(A, V) __stcs(A, V) #else -#define LOAD_INCOHERENT(A) *(A) -#define LOAD_STREAMED(A) *(A) -#define STORE_STREAMED(A, V) *(A) = (V) +#define LOAD_NONCOHERENT(A) *(A) +#define LOAD_STREAMING(A) *(A) +#define STORE_STREAMING(A, V) *(A) = (V) #endif // compile-time sized SoA @@ -70,7 +70,7 @@ SOA_HOST_DEVICE_INLINE T readWithCacheStyle (const T * addr) { if constexpr (CACHE_ACCESS_STYLE == CacheAccessStyle::NonCoherent) { return LOAD_INCOHERENT(addr); } else if constexpr (CACHE_ACCESS_STYLE == CacheAccessStyle::Streaming) { - return LOAD_STREAMED(addr); + return LOAD_STREAMING(addr); } return *addr; } @@ -88,10 +88,19 @@ class SoAValue { typedef typename Restr::Pointer Ptr; typedef typename Restr::Reference Ref; typedef typename Restr::PointerToConst PtrToConst; + typedef typename Restr::ReferenceToConst RefToConst; SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE Ref operator()() { return alignedCol()[idx_]; } - SOA_HOST_DEVICE_INLINE Val operator()() const { return *(alignedCol() + idx_); } + SOA_HOST_DEVICE_INLINE Ref operator()() { + // Ptr type will add the restrict qualifyer if needed + Ptr col = alignedCol(); + return col[idx_]; + } + SOA_HOST_DEVICE_INLINE RefToConst operator()() const { + // PtrToConst type will add the restrict qualifyer if needed + PtrToConst col = alignedCol(); + return col[idx_]; + } SOA_HOST_DEVICE_INLINE Ptr operator&() { return &alignedCol()[idx_]; } SOA_HOST_DEVICE_INLINE PtrToConst operator&() const { return &alignedCol()[idx_]; } template @@ -123,9 +132,14 @@ class SoAConstValue { typedef typename Restr::Pointer Ptr; typedef typename Restr::Reference Ref; typedef typename Restr::PointerToConst PtrToConst; + typedef typename Restr::ReferenceToConst RefToConst; SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T* col) : idx_(i), col_(col) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE T operator()() const { return *(alignedCol() + idx_); } + SOA_HOST_DEVICE_INLINE RefToConst operator()() const { + // Ptr type will add the restrict qualifyer if needed + PtrToConst col = alignedCol(); + return col[idx_]; + } SOA_HOST_DEVICE_INLINE const T* operator&() const { return &alignedCol()[idx_]; } typedef T valueType; static constexpr auto valueSize = sizeof(T); diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index d8dfc2589..85368cee6 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -193,7 +193,7 @@ struct ConstValueTraits { * Declaration of the members accessors of the const element subclass */ #define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - SOA_HOST_DEVICE_INLINE typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) LOCAL_NAME() const { \ + SOA_HOST_DEVICE_INLINE typename SoAConstValueWithConf::RefToConst LOCAL_NAME() const { \ return BOOST_PP_CAT(LOCAL_NAME, _)(); \ } @@ -286,9 +286,13 @@ struct ConstValueTraits { #define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) +/* ---- MUTABLE VIEW -------------------------------------------------------------------------------------------------------------------- */ + #define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ + cms::soa::AlignmentEnforcement ALIGNMENT_ENFORCEMENT = cms::soa::AlignmentEnforcement::Relaxed, \ + cms::soa::CacheAccessStyle CACHE_ACCESS_STYLE = cms::soa::CacheAccessStyle::Default, \ + cms::soa::RestrictQualify RESTRICT_QUALIFY = cms::soa::RestrictQualify::Disabled> \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ @@ -303,12 +307,14 @@ struct ConstValueTraits { constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ - /* Those typedefs avoid having commas in macros (which is problematic) */ \ + constexpr static cms::soa::CacheAccessStyle cacheAccessStyle = CACHE_ACCESS_STYLE; \ + constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ +/* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = cms::soa::SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ @@ -395,11 +401,13 @@ struct ConstValueTraits { _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ } +/* ---- CONST VIEW --------------------------------------------------------------------------------------------------------------------- */ + #define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ + cms::soa::RestrictQualify RESTRICT_QUALIFY = cms::soa::RestrictQualify::Enabled> \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index 46efd5ed5..1b216db17 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -19,6 +19,8 @@ GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, SOA_COLUMN(double, x), SOA_COLUMN(double, y), SOA_COLUMN(double, z), + SOA_COLUMN(double, sum), + SOA_COLUMN(double, prod), SOA_EIGEN_COLUMN(Eigen::Vector3d, a), SOA_EIGEN_COLUMN(Eigen::Vector3d, b), SOA_EIGEN_COLUMN(Eigen::Vector3d, r), @@ -44,6 +46,8 @@ GENERATE_SOA_VIEW(SoA1ViewTemplate, SOA_VIEW_VALUE(soa1, x), SOA_VIEW_VALUE(soa1, y), SOA_VIEW_VALUE(soa1, z), + SOA_VIEW_VALUE(soa1, sum), + SOA_VIEW_VALUE(soa1, prod), SOA_VIEW_VALUE(soa1, color), SOA_VIEW_VALUE(soa1, value), SOA_VIEW_VALUE(soa1, py), @@ -100,6 +104,48 @@ GENERATE_SOA_CONST_VIEW(SoA1View2Gconst, ) ); +// Parameter reusing kernels. The disassembly will indicate whether the compiler uses the wanted cache hits and uses +// `restrict` hints avoid multiple reduce loads. +// The PTX can be obtained using -ptx insterad of -c when compiling. +template +__device__ void addAndMulTemplate ( + T soa, size_t size) { + auto idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) return; + auto si = soa[idx]; + si.sum() = si.x() + si.y(); + si.prod() = si.x() * si.y(); + } + +__global__ void aAMDef(SoA1ViewTemplate soa, size_t size) { + addAndMulTemplate(soa, size); +} + +__global__ void aAMRestrict(SoA1ViewTemplate soa, size_t size) { + addAndMulTemplate(soa, size); +} + +__global__ void aAMNC(SoA1ViewTemplate soa, size_t size) { + addAndMulTemplate(soa, size); +} + +__global__ void aAMRestrict(SoA1ViewTemplate soa, size_t size) { + addAndMulTemplate(soa, size); +} + + const size_t size=10000; int main() { From 5a2d472dff65c8304c0cb2fbc40d3f99639a98b0 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 13 Jan 2022 09:28:55 +0100 Subject: [PATCH 36/50] [cudadev] Added automatic generation of trivially deducted view from layout definition. Changed the convention for the GENERATE_... macro family there the semicolon is now generated by the macro. Updated documentation. --- .../CUDADataFormats/SiPixelClustersCUDA.h | 6 +-- .../CUDADataFormats/SiPixelDigisCUDA.h | 10 ++--- .../CUDADataFormats/TrackingRecHit2DSOAView.h | 4 +- .../CondFormats/SiPixelROCsStatusAndMapping.h | 4 +- src/cudadev/DataFormats/SoA.md | 31 ++++++++++++-- src/cudadev/DataFormats/SoALayout.h | 2 +- src/cudadev/DataFormats/SoAView.h | 42 ++++++++++++++++++- src/cudadev/test/SoAStoreAndView_t.cu | 31 +++----------- 8 files changed, 86 insertions(+), 44 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 4414d4f80..714654098 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -18,7 +18,7 @@ class SiPixelClustersCUDA { // originally from rechits SOA_COLUMN(uint32_t, clusModuleStart) // index of the first cluster of each module - ); + ) // We use all defaults for the template parameters. using DeviceLayout = DeviceLayoutTemplate<>; @@ -33,7 +33,7 @@ class SiPixelClustersCUDA { // originally from rechits SOA_VIEW_VALUE(deviceLayout, clusModuleStart) // index of the first cluster of each module ) - ); + ) using DeviceView = DeviceViewTemplate<>; @@ -47,7 +47,7 @@ class SiPixelClustersCUDA { // originally from rechits SOA_VIEW_VALUE(deviceView, clusModuleStart) // index of the first cluster of each module ) - ); + ) using DeviceConstView = DeviceConstViewTemplate<>; diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index 6d6e60770..e334c102f 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -16,7 +16,7 @@ class SiPixelDigisCUDA { SOA_COLUMN(uint16_t, xx), /* local coordinates of each pixel */ SOA_COLUMN(uint16_t, yy), /* */ SOA_COLUMN(uint16_t, moduleInd) /* module id of each pixel */ - ); + ) using DeviceOnlyLayout = DeviceOnlyLayoutTemplate<>; @@ -28,7 +28,7 @@ class SiPixelDigisCUDA { /* separate product? */ SOA_COLUMN(uint32_t, pdigi), /* packed digi (row, col, adc) of each pixel */ SOA_COLUMN(uint32_t, rawIdArr) /* DetId of each pixel */ - ); + ) using HostDeviceLayout = HostDeviceLayoutTemplate<>; @@ -42,7 +42,7 @@ class SiPixelDigisCUDA { SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ ) - ); + ) using HostDeviceView = HostDeviceViewTemplate<>; @@ -60,7 +60,7 @@ class SiPixelDigisCUDA { SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ ) - ); + ) using DeviceFullView = DeviceFullViewTemplate<>; @@ -78,7 +78,7 @@ class SiPixelDigisCUDA { SOA_VIEW_VALUE(deviceFullView, adc), /* ADC of each pixel */ SOA_VIEW_VALUE(deviceFullView, clus) /* cluster id of each pixel */ ) - ); + ) using DevicePixelConstView = DevicePixelConstViewTemplate<>; diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index aed7490cd..c983dc6e5 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -52,7 +52,7 @@ class TrackingRecHit2DSOAStore { // 16 bits section (and cluster properties immediately continued) SOA_COLUMN(int16_t, clusterSizeX), SOA_COLUMN(int16_t, clusterSizeY) - ); + ) // The hits layout does not use default alignment but a more relaxed one. using HitsLayout = HitsLayoutTemplate; @@ -76,7 +76,7 @@ class TrackingRecHit2DSOAStore { SOA_VIEW_VALUE(hitsLayout, clusterSizeX), SOA_VIEW_VALUE(hitsLayout, clusterSizeY) ) - ); + ) using HitsView = HitsViewTemplate<>; diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index a2286721f..711675a5d 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -34,7 +34,7 @@ GENERATE_SOA_LAYOUT(SiPixelROCsStatusAndMappingLayoutTemplate, SOA_COLUMN(unsigned int, moduleId), SOA_COLUMN(unsigned char, badRocs), SOA_SCALAR(unsigned int, size) -); +) using SiPixelROCsStatusAndMappingLayout = SiPixelROCsStatusAndMappingLayoutTemplate<>; @@ -50,7 +50,7 @@ GENERATE_SOA_CONST_VIEW(SiPixelROCsStatusAndMappingConstViewTemplate, SOA_VIEW_VALUE(mappingLayout, badRocs), SOA_VIEW_VALUE(mappingLayout, size) ) -); +) // Slightly more complex than using, but allows forward declarations. struct SiPixelROCsStatusAndMappingConstView: public SiPixelROCsStatusAndMappingConstViewTemplate<> { diff --git a/src/cudadev/DataFormats/SoA.md b/src/cudadev/DataFormats/SoA.md index d6e169427..2a03e544b 100644 --- a/src/cudadev/DataFormats/SoA.md +++ b/src/cudadev/DataFormats/SoA.md @@ -121,6 +121,27 @@ for (size_t i=0; i < soaLayout.soaMetadata().size(); ++i) { soaView.someNumber() += i; } ``` +Any mixture of mutable and const views can also be defined automatically with the layout (for the trivially identical views) using one those macros `GENERATE_SOA_LAYOUT_VIEW_AND_CONST_VIEW`, `GENERATE_SOA_LAYOUT_AND_VIEW` and `GENERATE_SOA_LAYOUT_AND_CONST_VIEW`: + +```C++ +GENERATE_SOA_LAYOUT_VIEW_AND_CONST_VIEW(SoA1LayoutTemplate, SoA1ViewTemplate, SoA1ConstViewTemplate, + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_COLUMN(double, sum), + SOA_COLUMN(double, prod), + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(int32_t, value), + SOA_COLUMN(double *, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount), + + // scalars: one value for the whole structure + SOA_SCALAR(const char *, description), + SOA_SCALAR(uint32_t, someNumber) +) +``` ## Template parameters @@ -137,12 +158,16 @@ scenarios where only a subset of columns are used in a given GPU kernel. ## Current status and further improvements +### Available features + - The layout and views support scalars and columns, alignment and alignment enforcement and hinting. -- Cache access style will be added at in subsequent updates. -- `__restrict__` compiler hinting will be added later. +- Automatic `__restrict__` compiler hinting is supported. +- A shortcut alloCreate a mechanism to derive trivial views and const views from a single layout. +- Cache access style, which was explored will be removed as this not-yet-used feature interferes with `__restrict__` support (which is already in used in existing code) + +### Planned additions - Optional range checking will be added later. This implies adding support for size to views and will restrict views to columns of equal size. - Eigen access was validated with an earlier scheme, but will be ported back to the current one later. Some alignment information can be passed to Eigen strcutures. Const variants of access classes should be created to ensure we cannot leak mutable access to const products. - Improve `dump()` function and turn it into a more classic `operator<<()`. -- Create a mechanism to derive trivial views and const views from a single layout. diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 3b9f45c13..058d7d91a 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -343,6 +343,6 @@ size_t nElements_; \ size_t byteSize_; \ _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ - } + }; #endif // ndef DataStructures_SoALayout_h diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 85368cee6..4168f6f85 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -399,7 +399,7 @@ struct ConstValueTraits { \ private: \ _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ - } + }; /* ---- CONST VIEW --------------------------------------------------------------------------------------------------------------------- */ @@ -491,6 +491,44 @@ struct ConstValueTraits { \ private: \ _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ - } + }; + +/** + * Helper macro turning layout field declaration into view field declaration. + */ +#define _VIEW_FIELD_FROM_LAYOUT_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ + (DATA, NAME, NAME) + +#define _VIEW_FIELD_FROM_LAYOUT(R, DATA, VALUE_TYPE_NAME) \ + BOOST_PP_EXPAND ((_VIEW_FIELD_FROM_LAYOUT_IMPL BOOST_PP_TUPLE_PUSH_BACK(VALUE_TYPE_NAME, DATA))) + +/** + * A macro defining both layout and view(s) in one go. + */ + +#define GENERATE_SOA_LAYOUT_VIEW_AND_CONST_VIEW(LAYOUT_NAME, VIEW_NAME, CONST_VIEW_NAME, ... ) \ +GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ +using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME <>; \ +GENERATE_SOA_VIEW(VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); \ +GENERATE_SOA_CONST_VIEW(CONST_VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME,_default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + +#define GENERATE_SOA_LAYOUT_AND_VIEW(LAYOUT_NAME, VIEW_NAME, ... ) \ +GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ +using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME <>; \ +GENERATE_SOA_VIEW(VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + + +#define GENERATE_SOA_LAYOUT_AND_CONST_VIEW(LAYOUT_NAME, CONST_VIEW_NAME, ... ) \ +GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ +using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME <>; \ +GENERATE_SOA_CONST_VIEW(CONST_VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME,_default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); #endif // ndef DataStructures_SoAView_h diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index 1b216db17..ac454e766 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -9,8 +9,8 @@ // Multiple stores in a buffer // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. - -GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, + +GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, SoA1ViewTemplate, // predefined static scalars // size_t size; // size_t alignment; @@ -21,9 +21,10 @@ GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, SOA_COLUMN(double, z), SOA_COLUMN(double, sum), SOA_COLUMN(double, prod), + /* Leave Eigen definitions out until support is complete. SOA_EIGEN_COLUMN(Eigen::Vector3d, a), SOA_EIGEN_COLUMN(Eigen::Vector3d, b), - SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r),*/ SOA_COLUMN(uint16_t, color), SOA_COLUMN(int32_t, value), SOA_COLUMN(double *, py), @@ -33,31 +34,9 @@ GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, // scalars: one value for the whole structure SOA_SCALAR(const char *, description), SOA_SCALAR(uint32_t, someNumber) -); +) using SoA1Layout = SoA1LayoutTemplate<>; - -// A 1 to 1 view of the store (except for unsupported types). -GENERATE_SOA_VIEW(SoA1ViewTemplate, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(SoA1Layout, soa1) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(soa1, x), - SOA_VIEW_VALUE(soa1, y), - SOA_VIEW_VALUE(soa1, z), - SOA_VIEW_VALUE(soa1, sum), - SOA_VIEW_VALUE(soa1, prod), - SOA_VIEW_VALUE(soa1, color), - SOA_VIEW_VALUE(soa1, value), - SOA_VIEW_VALUE(soa1, py), - SOA_VIEW_VALUE(soa1, count), - SOA_VIEW_VALUE(soa1, anotherCount), - SOA_VIEW_VALUE(soa1, description), - SOA_VIEW_VALUE(soa1, someNumber) - ) -); - using SoA1View = SoA1ViewTemplate<>; // A partial view (artificial mix of store and view) From f89caecd681fbd0c3c7ccb94107a627913664c1e Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 13 Jan 2022 10:27:51 +0100 Subject: [PATCH 37/50] [cudadev] Removed cache access style support code in SoA. The cache access function at the SoA level interfere with the __restrict__ support. It could still be re-introduced as a separate tool. --- src/cudadev/DataFormats/SoA.md | 2 +- src/cudadev/DataFormats/SoACommon.h | 25 ------------------------- src/cudadev/DataFormats/SoAView.h | 14 +++++--------- src/cudadev/test/SoAStoreAndView_t.cu | 17 ----------------- 4 files changed, 6 insertions(+), 52 deletions(-) diff --git a/src/cudadev/DataFormats/SoA.md b/src/cudadev/DataFormats/SoA.md index 2a03e544b..c990f5bc1 100644 --- a/src/cudadev/DataFormats/SoA.md +++ b/src/cudadev/DataFormats/SoA.md @@ -163,7 +163,7 @@ scenarios where only a subset of columns are used in a given GPU kernel. - The layout and views support scalars and columns, alignment and alignment enforcement and hinting. - Automatic `__restrict__` compiler hinting is supported. - A shortcut alloCreate a mechanism to derive trivial views and const views from a single layout. -- Cache access style, which was explored will be removed as this not-yet-used feature interferes with `__restrict__` support (which is already in used in existing code) +- Cache access style, which was explored, was abandoned as this not-yet-used feature interferes with `__restrict__` support (which is already in used in existing code). It could be made available as a separate tool that can be used directly by the module developer, orthogonally from SoA. ### Planned additions - Optional range checking will be added later. This implies adding support for size to views and will restrict views to columns of diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 317bc19f9..29d533f20 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -23,23 +23,10 @@ #define SOA_DEVICE_RESTRICT #endif -#if defined(__CUDACC__) && defined(__CUDA_ARCH__) -// Read a pointer content via read-only (non coherent) cache. -#define LOAD_NONCOHERENT(A) __ldg(A) -#define LOAD_STREAMING(A) __ldcs(A) -#define STORE_STREAMING(A, V) __stcs(A, V) -#else -#define LOAD_NONCOHERENT(A) *(A) -#define LOAD_STREAMING(A) *(A) -#define STORE_STREAMING(A, V) *(A) = (V) -#endif - // compile-time sized SoA namespace cms::soa { -enum class CacheAccessStyle : char { Default, NonCoherent, Streaming }; - enum class RestrictQualify : bool { Enabled, Disabled, Default = Disabled }; template @@ -65,21 +52,10 @@ struct add_restrict { typedef const T & ReferenceToConst; }; -template -SOA_HOST_DEVICE_INLINE T readWithCacheStyle (const T * addr) { - if constexpr (CACHE_ACCESS_STYLE == CacheAccessStyle::NonCoherent) { - return LOAD_INCOHERENT(addr); - } else if constexpr (CACHE_ACCESS_STYLE == CacheAccessStyle::Streaming) { - return LOAD_STREAMING(addr); - } - return *addr; -} - // Helper template managing the value within it column // The optional compile time alignment parameter enables informing the // compiler of alignment (enforced by caller). template class SoAValue { public: @@ -123,7 +99,6 @@ class SoAValue { // Helper template managing the value within it column template class SoAConstValue { public: diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 4168f6f85..c31b00411 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -291,7 +291,6 @@ struct ConstValueTraits { #define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ @@ -307,14 +306,13 @@ struct ConstValueTraits { constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ - constexpr static cms::soa::CacheAccessStyle cacheAccessStyle = CACHE_ACCESS_STYLE; \ constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ -/* Those typedefs avoid having commas in macros (which is problematic) */ \ +/* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = cms::soa::SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ @@ -406,7 +404,6 @@ struct ConstValueTraits { #define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ @@ -422,14 +419,13 @@ struct ConstValueTraits { constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ - constexpr static cms::soa::CacheAccessStyle cacheAccessStyle = CACHE_ACCESS_STYLE; \ constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ - using SoAValueWithConf = cms::soa::SoAValue; \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index ac454e766..2f44b6daa 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -98,33 +98,16 @@ __device__ void addAndMulTemplate ( __global__ void aAMDef(SoA1ViewTemplate soa, size_t size) { addAndMulTemplate(soa, size); } __global__ void aAMRestrict(SoA1ViewTemplate soa, size_t size) { addAndMulTemplate(soa, size); } -__global__ void aAMNC(SoA1ViewTemplate soa, size_t size) { - addAndMulTemplate(soa, size); -} - -__global__ void aAMRestrict(SoA1ViewTemplate soa, size_t size) { - addAndMulTemplate(soa, size); -} - - const size_t size=10000; int main() { From 9ad2ba7c35742222901f5b7001da47b00df3e78b Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 13 Jan 2022 18:40:07 +0100 Subject: [PATCH 38/50] [cudadev] Added size support in views and range checking. Views can now only be defined from layouts and view of the same size (if multiple). --- .../SiPixelROCsStatusAndMappingWrapper.h | 1 + src/cudadev/DataFormats/SoACommon.h | 9 +++ src/cudadev/DataFormats/SoAView.h | 80 +++++++++++++++---- src/cudadev/test/SoAStoreAndView_t.cu | 10 +++ 4 files changed, 84 insertions(+), 16 deletions(-) diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 483088b67..61f0f5b3c 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -37,6 +37,7 @@ class SiPixelROCsStatusAndMappingWrapper { // Populate the view with individual column pointers auto & cmd = *cablingMapDevice; cablingMapDeviceView = SiPixelROCsStatusAndMappingConstView( + pixelgpudetails::MAX_SIZE, cmd.fed, // Those are array pointers (in device, but we won't dereference them here). cmd.link, cmd.roc, diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 29d533f20..cd3ac76cb 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -23,12 +23,21 @@ #define SOA_DEVICE_RESTRICT #endif +// Exception throwing (or willful crash in kernels) +#if defined(__CUDACC__) && defined(__CUDA_ARCH__) +#define SOA_THROW_OUT_OF_RANGE(A) { printf (A); *((char *)nullptr) = 0; } +#else +#define SOA_THROW_OUT_OF_RANGE(A) { throw std::out_of_range(A); } +#endif + // compile-time sized SoA namespace cms::soa { enum class RestrictQualify : bool { Enabled, Disabled, Default = Disabled }; +enum class RangeChecking: bool { Enabled, Disabled, Default = Disabled }; + template struct add_restrict {}; diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index c31b00411..6aee9d1f6 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -141,6 +141,22 @@ struct ConstValueTraits { #define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL LAYOUT_MEMBER_NAME) +/** + * Generator of size computation for constructor. + * This is the per-layout part of the lambda checking they all have the same size. + */ +#define _UPDATE_SIZE_OF_VIEW_IMPL(LAYOUT_TYPE, LAYOUT_NAME) \ + if (set) { \ + if (ret != LAYOUT_NAME.soaMetadata().size()) \ + throw std::out_of_range("In constructor by layout: different sizes from layouts."); \ + } else { \ + ret = LAYOUT_NAME.soaMetadata().size(); \ + set = true; \ + } + +#define _UPDATE_SIZE_OF_VIEW(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_UPDATE_SIZE_OF_VIEW_IMPL TYPE_NAME) + /** * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. @@ -291,7 +307,8 @@ struct ConstValueTraits { #define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ + cms::soa::RestrictQualify RESTRICT_QUALIFY = cms::soa::RestrictQualify::Disabled, \ + cms::soa::RangeChecking RANGE_CHECKING = cms::soa::RangeChecking::Disabled> \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ @@ -307,6 +324,7 @@ struct ConstValueTraits { constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ + constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ using SoAValueWithConf = cms::soa::SoAValue; \ @@ -321,6 +339,7 @@ struct ConstValueTraits { */ \ struct SoAMetadata { \ friend CLASS; \ + SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ /* Alias layout or view types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ \ @@ -339,17 +358,24 @@ struct ConstValueTraits { SOA_HOST_DEVICE_INLINE SoAMetadata soaMetadata() { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ - CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ \ /* Constructor relying on user provided layouts or views */ \ SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), LAYOUTS_LIST)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + : nElements_( \ + [&]() -> size_t { \ + bool set = false; \ + size_t ret = 0; \ + _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ + return ret; \ + }() \ + ), \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ \ /* Constructor relying on individually provided column addresses */ \ - SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, \ - BOOST_PP_EMPTY(), \ - VALUE_LIST)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ + SOA_HOST_ONLY CLASS(size_t nElements, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, BOOST_PP_EMPTY(), VALUE_LIST)) \ + : nElements_(nElements), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ \ struct const_element { \ SOA_HOST_DEVICE_INLINE \ @@ -378,12 +404,18 @@ struct ConstValueTraits { /* AoS-like accessor (non-const) */ \ SOA_HOST_DEVICE_INLINE \ element operator[](size_t index) { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ + if (index >= nElements_) SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ + } \ return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ } \ \ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ - const_element operator[](size_t index) const { \ + const_element operator[](size_t index) const { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ + if (index >= nElements_) SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ + } \ return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ } \ \ @@ -396,6 +428,7 @@ struct ConstValueTraits { SOA_HOST_ONLY friend void dump(); \ \ private: \ + size_t nElements_; \ _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ }; @@ -404,8 +437,9 @@ struct ConstValueTraits { #define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ - struct CLASS { \ + cms::soa::RestrictQualify RESTRICT_QUALIFY = cms::soa::RestrictQualify::Enabled, \ + cms::soa::RangeChecking RANGE_CHECKING = cms::soa::RangeChecking::Disabled> \ + struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ @@ -420,6 +454,7 @@ struct ConstValueTraits { constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ + constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ using SoAValueWithConf = cms::soa::SoAValue; \ @@ -435,7 +470,8 @@ struct ConstValueTraits { */ \ struct SoAMetadata { \ friend CLASS; \ - /* Alias layout/view types to name-derived identifyer to allow simpler definitions */ \ + SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ + /* Alias layout/view types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ @@ -451,15 +487,23 @@ struct ConstValueTraits { SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ - CLASS() : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ \ /* Constructor relying on user provided layouts or views */ \ SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, LAYOUTS_LIST)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + : nElements_( \ + [&]() -> size_t { \ + bool set = false; \ + size_t ret = 0; \ + _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ + return ret; \ + }() \ + ), \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ \ /* Constructor relying on individually provided column addresses */ \ - SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, const, VALUE_LIST)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ + SOA_HOST_ONLY CLASS(size_t nElements, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, const, VALUE_LIST)) \ + : nElements_(nElements), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ \ struct const_element { \ SOA_HOST_DEVICE_INLINE \ @@ -474,7 +518,10 @@ struct ConstValueTraits { \ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ - const_element operator[](size_t index) const { \ + const_element operator[](size_t index) const { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ + if (index >= nElements_) SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ + } \ return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ } \ \ @@ -486,6 +533,7 @@ struct ConstValueTraits { SOA_HOST_ONLY friend void dump(); \ \ private: \ + size_t nElements_; \ _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ }; diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoAStoreAndView_t.cu index 2f44b6daa..6603ed947 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoAStoreAndView_t.cu @@ -167,4 +167,14 @@ int main() { assert(sv2gc.y() == 2.0 * i); assert(sv2gc.color() == i); } + + // Validation of range checking + try { + // Get a view like the default, except for range checking + SoA1ViewTemplate soa1viewRangeChecking(soa1); + // This should throw an exception + [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.soaMetadata().size()]; + assert(false); + } catch (const std::out_of_range &) {} } \ No newline at end of file From a2643bfa0b266054a07f37b393c40075dcb44819 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Fri, 14 Jan 2022 16:29:05 +0100 Subject: [PATCH 39/50] [cudadev] Renamed SoA test to match currrent nomenclature. --- .../test/{SoAStoreAndView_t.cu => SoALayoutAndView_t.cu} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/cudadev/test/{SoAStoreAndView_t.cu => SoALayoutAndView_t.cu} (99%) diff --git a/src/cudadev/test/SoAStoreAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu similarity index 99% rename from src/cudadev/test/SoAStoreAndView_t.cu rename to src/cudadev/test/SoALayoutAndView_t.cu index 6603ed947..f5fa1794e 100644 --- a/src/cudadev/test/SoAStoreAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -55,7 +55,7 @@ GENERATE_SOA_VIEW(SoA1View2GTemplate, SOA_VIEW_VALUE(soa1v, description), SOA_VIEW_VALUE(soa1, someNumber) ) -); +) using SoA1View2G = SoA1View2GTemplate<>; @@ -81,7 +81,7 @@ GENERATE_SOA_CONST_VIEW(SoA1View2Gconst, SOA_VIEW_VALUE(soa1v, description), SOA_VIEW_VALUE(soa1, someNumber) ) -); +) // Parameter reusing kernels. The disassembly will indicate whether the compiler uses the wanted cache hits and uses // `restrict` hints avoid multiple reduce loads. From 24926af3b1ed19ac2b51e720e87d395d4d29e46a Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 17 Jan 2022 16:07:19 +0100 Subject: [PATCH 40/50] [cudadev] Ran clang-format to format code. --- .../CUDADataFormats/SiPixelClustersCUDA.cc | 5 +- .../CUDADataFormats/SiPixelClustersCUDA.h | 67 +-- .../CUDADataFormats/SiPixelDigisCUDA.cc | 44 +- .../CUDADataFormats/SiPixelDigisCUDA.h | 127 ++-- .../TrackingRecHit2DHeterogeneous.h | 40 +- .../TrackingRecHit2DHostSOAStore.cc | 15 +- .../TrackingRecHit2DHostSOAStore.h | 13 +- .../CUDADataFormats/TrackingRecHit2DSOAView.h | 170 +++--- .../CondFormats/SiPixelROCsStatusAndMapping.h | 40 +- .../SiPixelROCsStatusAndMappingWrapper.cc | 20 +- .../SiPixelROCsStatusAndMappingWrapper.h | 27 +- src/cudadev/DataFormats/SoACommon.h | 476 +++++++-------- src/cudadev/DataFormats/SoALayout.h | 89 +-- src/cudadev/DataFormats/SoAView.h | 553 +++++++++--------- .../SiPixelRawToClusterGPUKernel.cu | 3 +- .../SiPixelRawToClusterGPUKernel.h | 2 +- src/cudadev/test/SoALayoutAndView_t.cu | 150 +++-- 17 files changed, 919 insertions(+), 922 deletions(-) diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc index 90361b048..2c71cdabf 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.cc @@ -3,10 +3,9 @@ #include "CUDACore/host_unique_ptr.h" #include "CUDADataFormats/SiPixelClustersCUDA.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(): data_d(), deviceLayout_(data_d.get(), 0), deviceView_(deviceLayout_) {} +SiPixelClustersCUDA::SiPixelClustersCUDA() : data_d(), deviceLayout_(data_d.get(), 0), deviceView_(deviceLayout_) {} SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream) : data_d(cms::cuda::make_device_unique(DeviceLayout::computeDataSize(maxModules), stream)), deviceLayout_(data_d.get(), maxModules), - deviceView_(deviceLayout_) -{} + deviceView_(deviceLayout_) {} diff --git a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h index 714654098..9f7451239 100644 --- a/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelClustersCUDA.h @@ -12,45 +12,40 @@ class SiPixelClustersCUDA { public: GENERATE_SOA_LAYOUT(DeviceLayoutTemplate, - SOA_COLUMN(uint32_t, moduleStart), // index of the first pixel of each module - SOA_COLUMN(uint32_t, clusInModule), // number of clusters found in each module - SOA_COLUMN(uint32_t, moduleId), // module id of each module - - // originally from rechits - SOA_COLUMN(uint32_t, clusModuleStart) // index of the first cluster of each module - ) - + SOA_COLUMN(uint32_t, moduleStart), // index of the first pixel of each module + SOA_COLUMN(uint32_t, clusInModule), // number of clusters found in each module + SOA_COLUMN(uint32_t, moduleId), // module id of each module + + // originally from rechits + SOA_COLUMN(uint32_t, clusModuleStart)) // index of the first cluster of each module + // We use all defaults for the template parameters. using DeviceLayout = DeviceLayoutTemplate<>; - GENERATE_SOA_VIEW(DeviceViewTemplate, - SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceLayout, deviceLayout)), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(deviceLayout, moduleStart), // index of the first pixel of each module - SOA_VIEW_VALUE(deviceLayout, clusInModule), // number of clusters found in each module - SOA_VIEW_VALUE(deviceLayout, moduleId), // module id of each module - - // originally from rechits - SOA_VIEW_VALUE(deviceLayout, clusModuleStart) // index of the first cluster of each module - ) - ) - + GENERATE_SOA_VIEW( + DeviceViewTemplate, + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceLayout, deviceLayout)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(deviceLayout, moduleStart), // index of the first pixel of each module + SOA_VIEW_VALUE(deviceLayout, clusInModule), // number of clusters found in each module + SOA_VIEW_VALUE(deviceLayout, moduleId), // module id of each module + + // originally from rechits + SOA_VIEW_VALUE(deviceLayout, clusModuleStart))) // index of the first cluster of each module + using DeviceView = DeviceViewTemplate<>; - - GENERATE_SOA_CONST_VIEW(DeviceConstViewTemplate, - SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceView, deviceView)), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(deviceView, moduleStart), // index of the first pixel of each module - SOA_VIEW_VALUE(deviceView, clusInModule), // number of clusters found in each module - SOA_VIEW_VALUE(deviceView, moduleId), // module id of each module - - // originally from rechits - SOA_VIEW_VALUE(deviceView, clusModuleStart) // index of the first cluster of each module - ) - ) - + + GENERATE_SOA_CONST_VIEW( + DeviceConstViewTemplate, + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceView, deviceView)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(deviceView, moduleStart), // index of the first pixel of each module + SOA_VIEW_VALUE(deviceView, clusInModule), // number of clusters found in each module + SOA_VIEW_VALUE(deviceView, moduleId), // module id of each module + + // originally from rechits + SOA_VIEW_VALUE(deviceView, clusModuleStart))) // index of the first cluster of each module + using DeviceConstView = DeviceConstViewTemplate<>; - + explicit SiPixelClustersCUDA(); explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream); ~SiPixelClustersCUDA() = default; @@ -77,10 +72,10 @@ class SiPixelClustersCUDA { DeviceConstView view() const { return DeviceConstView(deviceView_); } private: - cms::cuda::device::unique_ptr data_d; // Single SoA storage + cms::cuda::device::unique_ptr data_d; // Single SoA storage DeviceLayout deviceLayout_; DeviceView deviceView_; - + uint32_t nClusters_h = 0; }; diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc index a816806f8..55837fa92 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.cc @@ -6,28 +6,22 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) : data_d(cms::cuda::make_device_unique( - DeviceOnlyLayout::computeDataSize(maxFedWords) + - HostDeviceLayout::computeDataSize(maxFedWords), - stream)), + DeviceOnlyLayout::computeDataSize(maxFedWords) + HostDeviceLayout::computeDataSize(maxFedWords), stream)), deviceOnlyLayout_d(data_d.get(), maxFedWords), hostDeviceLayout_d(deviceOnlyLayout_d.soaMetadata().nextByte(), maxFedWords), deviceFullView_(deviceOnlyLayout_d, hostDeviceLayout_d), - devicePixelConstView_(deviceFullView_) -{} + devicePixelConstView_(deviceFullView_) {} SiPixelDigisCUDA::SiPixelDigisCUDA() - : data_d(),deviceOnlyLayout_d(), hostDeviceLayout_d(), deviceFullView_(), devicePixelConstView_() -{} + : data_d(), deviceOnlyLayout_d(), hostDeviceLayout_d(), deviceFullView_(), devicePixelConstView_() {} -SiPixelDigisCUDA::HostStore::HostStore() - : data_h(), hostLayout_(nullptr, 0), hostView_(hostLayout_) -{} +SiPixelDigisCUDA::HostStore::HostStore() : data_h(), hostLayout_(nullptr, 0), hostView_(hostLayout_) {} SiPixelDigisCUDA::HostStore::HostStore(size_t maxFedWords, cudaStream_t stream) - : data_h(cms::cuda::make_host_unique(SiPixelDigisCUDA::HostDeviceLayout::computeDataSize(maxFedWords), stream)), - hostLayout_(data_h.get(), maxFedWords), - hostView_(hostLayout_) -{} + : data_h(cms::cuda::make_host_unique(SiPixelDigisCUDA::HostDeviceLayout::computeDataSize(maxFedWords), + stream)), + hostLayout_(data_h.get(), maxFedWords), + hostView_(hostLayout_) {} void SiPixelDigisCUDA::HostStore::reset() { hostLayout_ = HostDeviceLayout(); @@ -38,7 +32,8 @@ void SiPixelDigisCUDA::HostStore::reset() { cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { auto ret = cms::cuda::make_host_unique(nDigis(), stream); // TODO: this is downgraded from cms::cuda::copyAsync as we copy data from within a block but not the full block. - cudaCheck(cudaMemcpyAsync(ret.get(), deviceFullView_.adc(), nDigis() * sizeof(decltype(ret[0])), cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaMemcpyAsync( + ret.get(), deviceFullView_.adc(), nDigis() * sizeof(decltype(ret[0])), cudaMemcpyDeviceToHost, stream)); return ret; } @@ -49,13 +44,20 @@ SiPixelDigisCUDA::HostStore SiPixelDigisCUDA::dataToHostAsync(cudaStream_t strea HostStore ret(nDigis(), stream); auto rhlsm = ret.hostLayout_.soaMetadata(); auto hdlsm_d = hostDeviceLayout_d.soaMetadata(); - cudaCheck(cudaMemcpyAsync(rhlsm.addressOf_adc(), hdlsm_d.addressOf_adc(), nDigis_h * sizeof(*rhlsm.addressOf_adc()), - cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaMemcpyAsync(rhlsm.addressOf_adc(), + hdlsm_d.addressOf_adc(), + nDigis_h * sizeof(*rhlsm.addressOf_adc()), + cudaMemcpyDeviceToHost, + stream)); // Copy the other columns, realigning the data in shorter arrays. clus is the first but all 3 columns (clus, pdigis, rawIdArr) have // the same geometry. - cudaCheck(cudaMemcpy2DAsync(rhlsm.addressOf_clus(), rhlsm.clusPitch(), - hdlsm_d.addressOf_clus(), hdlsm_d.clusPitch(), - 3 /* rows */, - nDigis() * sizeof(decltype (*ret.hostView_.clus())), cudaMemcpyDeviceToHost, stream)); + cudaCheck(cudaMemcpy2DAsync(rhlsm.addressOf_clus(), + rhlsm.clusPitch(), + hdlsm_d.addressOf_clus(), + hdlsm_d.clusPitch(), + 3 /* rows */, + nDigis() * sizeof(decltype(*ret.hostView_.clus())), + cudaMemcpyDeviceToHost, + stream)); return ret; } \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h index e334c102f..734b3631b 100644 --- a/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h +++ b/src/cudadev/CUDADataFormats/SiPixelDigisCUDA.h @@ -11,75 +11,71 @@ class SiPixelDigisCUDA { public: - GENERATE_SOA_LAYOUT(DeviceOnlyLayoutTemplate, - /* These are consumed by downstream device code */ - SOA_COLUMN(uint16_t, xx), /* local coordinates of each pixel */ - SOA_COLUMN(uint16_t, yy), /* */ - SOA_COLUMN(uint16_t, moduleInd) /* module id of each pixel */ + GENERATE_SOA_LAYOUT( + DeviceOnlyLayoutTemplate, + /* These are consumed by downstream device code */ + SOA_COLUMN(uint16_t, xx), /* local coordinates of each pixel */ + SOA_COLUMN(uint16_t, yy), /* */ + SOA_COLUMN(uint16_t, moduleInd) /* module id of each pixel */ ) - + using DeviceOnlyLayout = DeviceOnlyLayoutTemplate<>; - - GENERATE_SOA_LAYOUT(HostDeviceLayoutTemplate, - /* These are also transferred to host (see HostDataView) */ - SOA_COLUMN(uint16_t, adc), /* ADC of each pixel */ - SOA_COLUMN(int32_t, clus), /* cluster id of each pixel */ - /* These are for CPU output; should we (eventually) place them to a */ - /* separate product? */ - SOA_COLUMN(uint32_t, pdigi), /* packed digi (row, col, adc) of each pixel */ - SOA_COLUMN(uint32_t, rawIdArr) /* DetId of each pixel */ + + GENERATE_SOA_LAYOUT( + HostDeviceLayoutTemplate, + /* These are also transferred to host (see HostDataView) */ + SOA_COLUMN(uint16_t, adc), /* ADC of each pixel */ + SOA_COLUMN(int32_t, clus), /* cluster id of each pixel */ + /* These are for CPU output; should we (eventually) place them to a */ + /* separate product? */ + SOA_COLUMN(uint32_t, pdigi), /* packed digi (row, col, adc) of each pixel */ + SOA_COLUMN(uint32_t, rawIdArr) /* DetId of each pixel */ ) - + using HostDeviceLayout = HostDeviceLayoutTemplate<>; - + GENERATE_SOA_VIEW(HostDeviceViewTemplate, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(hostDevice, adc), /* ADC of each pixel */ - SOA_VIEW_VALUE(hostDevice, clus), /* cluster id of each pixel */ - SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ - SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ - ) - ) - + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice)), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(hostDevice, adc), /* ADC of each pixel */ + SOA_VIEW_VALUE(hostDevice, clus), /* cluster id of each pixel */ + SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ + SOA_VIEW_VALUE(hostDevice, + rawIdArr) /* DetId of each pixel */ + )) + using HostDeviceView = HostDeviceViewTemplate<>; - - GENERATE_SOA_VIEW(DeviceFullViewTemplate, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(DeviceOnlyLayout, deviceOnly), - SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(deviceOnly, xx), /* local coordinates of each pixel */ - SOA_VIEW_VALUE(deviceOnly, yy), /* */ - SOA_VIEW_VALUE(deviceOnly, moduleInd),/* module id of each pixel */ - SOA_VIEW_VALUE(hostDevice, adc), /* ADC of each pixel */ - SOA_VIEW_VALUE(hostDevice, clus), /* cluster id of each pixel */ - SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ - SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ - ) - ) - + + GENERATE_SOA_VIEW( + DeviceFullViewTemplate, + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceOnlyLayout, deviceOnly), + SOA_VIEW_LAYOUT(HostDeviceLayout, hostDevice)), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(deviceOnly, xx), /* local coordinates of each pixel */ + SOA_VIEW_VALUE(deviceOnly, yy), /* */ + SOA_VIEW_VALUE(deviceOnly, moduleInd), /* module id of each pixel */ + SOA_VIEW_VALUE(hostDevice, adc), /* ADC of each pixel */ + SOA_VIEW_VALUE(hostDevice, clus), /* cluster id of each pixel */ + SOA_VIEW_VALUE(hostDevice, pdigi), /* packed digi (row, col, adc) of each pixel */ + SOA_VIEW_VALUE(hostDevice, rawIdArr) /* DetId of each pixel */ + )) + using DeviceFullView = DeviceFullViewTemplate<>; /* Device pixel view: this is a second generation view (view from view) */ - GENERATE_SOA_CONST_VIEW(DevicePixelConstViewTemplate, - /* We get out data from the DeviceFullView */ - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(DeviceFullView, deviceFullView) - ), - /* These are consumed by downstream device code */ - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(deviceFullView, xx), /* local coordinates of each pixel */ - SOA_VIEW_VALUE(deviceFullView, yy), /* */ - SOA_VIEW_VALUE(deviceFullView, moduleInd), /* module id of each pixel */ - SOA_VIEW_VALUE(deviceFullView, adc), /* ADC of each pixel */ - SOA_VIEW_VALUE(deviceFullView, clus) /* cluster id of each pixel */ - ) - ) - + GENERATE_SOA_CONST_VIEW( + DevicePixelConstViewTemplate, + /* We get out data from the DeviceFullView */ + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(DeviceFullView, deviceFullView)), + /* These are consumed by downstream device code */ + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(deviceFullView, xx), /* local coordinates of each pixel */ + SOA_VIEW_VALUE(deviceFullView, yy), /* */ + SOA_VIEW_VALUE(deviceFullView, moduleInd), /* module id of each pixel */ + SOA_VIEW_VALUE(deviceFullView, adc), /* ADC of each pixel */ + SOA_VIEW_VALUE(deviceFullView, clus) /* cluster id of each pixel */ + )) + using DevicePixelConstView = DevicePixelConstViewTemplate<>; explicit SiPixelDigisCUDA(); @@ -117,27 +113,28 @@ class SiPixelDigisCUDA { class HostStore { friend SiPixelDigisCUDA; + public: HostStore(); const SiPixelDigisCUDA::HostDeviceView view() { return hostView_; } void reset(); + private: HostStore(size_t maxFedWords, cudaStream_t stream); cms::cuda::host::unique_ptr data_h; HostDeviceLayout hostLayout_; HostDeviceView hostView_; - }; HostStore dataToHostAsync(cudaStream_t stream) const; - // Special copy for validation - cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + // Special copy for validation + cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; - const DevicePixelConstView& pixelConstView() const { return devicePixelConstView_; } + const DevicePixelConstView &pixelConstView() const { return devicePixelConstView_; } private: // These are consumed by downstream device code - cms::cuda::device::unique_ptr data_d; // Single SoA storage + cms::cuda::device::unique_ptr data_d; // Single SoA storage DeviceOnlyLayout deviceOnlyLayout_d; HostDeviceLayout hostDeviceLayout_d; DeviceFullView deviceFullView_; diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h index 26a4ca75a..5294328a8 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHeterogeneous.h @@ -40,21 +40,21 @@ class TrackingRecHit2DHeterogeneous { // Transfer the local and global coordinates, charge and size TrackingRecHit2DHostSOAStore hitsToHostAsync(cudaStream_t stream) const; - + // apparently unused //cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious - + unique_ptr m_PhiBinnerStore; //! unique_ptr m_AverageGeometryStore; //! unique_ptr m_store; //! uint32_t m_nHits; - - unique_ptr m_hitsSupportLayerStartStore; //! + + unique_ptr m_hitsSupportLayerStartStore; //! uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU! @@ -101,41 +101,41 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH //m_store16 = Traits::template make_device_unique(nHits * n16, stream); //m_store32 = // Traits::template make_device_unique(nHits * n32 + phase1PixelTopology::numberOfLayers + 1, stream); - // We need to store all SoA rows for TrackingRecHit2DSOAView::HitsView(nHits) + + // We need to store all SoA rows for TrackingRecHit2DSOAView::HitsView(nHits) + // (phase1PixelTopology::numberOfLayers + 1) TrackingRecHit2DSOAView::PhiBinner::index_type. - // As mentioned above, alignment is not important, yet we want to have 32 bits + // As mentioned above, alignment is not important, yet we want to have 32 bits // (TrackingRecHit2DSOAView::PhiBinner::index_type exactly) alignement for the second part. // In order to simplify code, we align all to the minimum necessary size (sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type)). { // Simplify a bit following computations const size_t phiBinnerByteSize = - (phase1PixelTopology::numberOfLayers + 1) * sizeof (TrackingRecHit2DSOAStore::PhiBinner::index_type); + (phase1PixelTopology::numberOfLayers + 1) * sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type); // Allocate the buffer - m_hitsSupportLayerStartStore = Traits::template make_device_unique ( - TrackingRecHit2DSOAStore::HitsLayout::computeDataSize(m_nHits) + - TrackingRecHit2DSOAStore::SupportObjectsLayout::computeDataSize(m_nHits) + - phiBinnerByteSize, - stream); + m_hitsSupportLayerStartStore = Traits::template make_device_unique( + TrackingRecHit2DSOAStore::HitsLayout::computeDataSize(m_nHits) + + TrackingRecHit2DSOAStore::SupportObjectsLayout::computeDataSize(m_nHits) + phiBinnerByteSize, + stream); // Split the buffer in stores and array store->m_hitsLayout = TrackingRecHit2DSOAStore::HitsLayout(m_hitsSupportLayerStartStore.get(), nHits); - store->m_supportObjectsLayout = TrackingRecHit2DSOAStore::SupportObjectsLayout(store->m_hitsLayout.soaMetadata().nextByte(), nHits); - m_hitsLayerStart = store->m_hitsLayerStart = reinterpret_cast (store->m_supportObjectsLayout.soaMetadata().nextByte()); + store->m_supportObjectsLayout = + TrackingRecHit2DSOAStore::SupportObjectsLayout(store->m_hitsLayout.soaMetadata().nextByte(), nHits); + m_hitsLayerStart = store->m_hitsLayerStart = + reinterpret_cast(store->m_supportObjectsLayout.soaMetadata().nextByte()); // Record additional references - store->m_hitsAndSupportView = TrackingRecHit2DSOAStore::HitsAndSupportView( - store->m_hitsLayout, - store->m_supportObjectsLayout - ); + store->m_hitsAndSupportView = + TrackingRecHit2DSOAStore::HitsAndSupportView(store->m_hitsLayout, store->m_supportObjectsLayout); m_phiBinnerStorage = store->m_phiBinnerStorage = store->m_hitsAndSupportView.phiBinnerStorage(); m_iphi = store->m_hitsAndSupportView.iphi(); } m_PhiBinnerStore = Traits::template make_device_unique(stream); static_assert(sizeof(TrackingRecHit2DSOAStore::hindex_type) == sizeof(float)); - static_assert(sizeof(TrackingRecHit2DSOAStore::hindex_type) == sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type)); + static_assert(sizeof(TrackingRecHit2DSOAStore::hindex_type) == + sizeof(TrackingRecHit2DSOAStore::PhiBinner::index_type)); // copy all the pointers m_phiBinner = store->m_phiBinner = m_PhiBinnerStore.get(); - + // transfer view if constexpr (std::is_same::value) { cms::cuda::copyAsync(m_store, store, stream); diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc index 530daa600..6e2338f6c 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.cc @@ -1,16 +1,15 @@ #include "CUDADataFormats/TrackingRecHit2DHostSOAStore.h" -TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(): - hitsLayout_(hits_h.get(), 0 /* size */, 1 /* byte alignement */) -{} +TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore() + : hitsLayout_(hits_h.get(), 0 /* size */, 1 /* byte alignement */) {} void TrackingRecHit2DHostSOAStore::reset() { hits_h.reset(); hitsLayout_ = TrackingRecHit2DSOAStore::HitsLayout(); } -TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream): - hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsLayout::computeDataSize(size), stream)), - hitsLayout_(hits_h.get(), size), - hitsView_(hitsLayout_) -{} +TrackingRecHit2DHostSOAStore::TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream) + : hits_h(cms::cuda::make_host_unique(TrackingRecHit2DSOAStore::HitsLayout::computeDataSize(size), + stream)), + hitsLayout_(hits_h.get(), size), + hitsView_(hitsLayout_) {} diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h index f2b34e9bc..e587932d4 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DHostSOAStore.h @@ -6,16 +6,20 @@ #include "CUDACore/host_unique_ptr.h" template -class TrackingRecHit2DHeterogeneous; +class TrackingRecHit2DHeterogeneous; struct TrackingRecHit2DHostSOAStore { template friend class TrackingRecHit2DHeterogeneous; + public: TrackingRecHit2DHostSOAStore(); void reset(); - __device__ __forceinline__ const auto operator[](size_t i) const { return hitsView_[i]; } - __device__ __forceinline__ size_t size() { return /* TODO: move to view when view will embed size */hitsLayout_.soaMetadata().size(); } + __device__ __forceinline__ const auto operator[](size_t i) const { return hitsView_[i]; } + __device__ __forceinline__ size_t size() { + return /* TODO: move to view when view will embed size */ hitsLayout_.soaMetadata().size(); + } + private: TrackingRecHit2DHostSOAStore(size_t size, cudaStream_t stream); cms::cuda::host::unique_ptr hits_h; @@ -23,5 +27,4 @@ struct TrackingRecHit2DHostSOAStore { TrackingRecHit2DSOAStore::HitsView hitsView_; }; - -#endif // ndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostStore_h \ No newline at end of file +#endif // ndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAHostStore_h \ No newline at end of file diff --git a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h index c983dc6e5..7ee03c1ea 100644 --- a/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h +++ b/src/cudadev/CUDADataFormats/TrackingRecHit2DSOAView.h @@ -24,115 +24,103 @@ class TrackingRecHit2DSOAStore { template friend class TrackingRecHit2DHeterogeneous; - + __device__ __forceinline__ uint32_t nHits() const { return m_nHits; } // Our arrays do not require specific alignment as access will not be coalesced in the current implementation // Sill, we need the 32 bits integers to be aligned, so we simply declare the SoA with the 32 bits fields first - // and the 16 bits behind (as they have a looser alignment requirement. Then the SoA can be create with a byte + // and the 16 bits behind (as they have a looser alignment requirement. Then the SoA can be create with a byte // alignment of 1) GENERATE_SOA_LAYOUT(HitsLayoutTemplate, - // 32 bits section - // local coord - SOA_COLUMN(float, xLocal), - SOA_COLUMN(float, yLocal), - SOA_COLUMN(float, xerrLocal), - SOA_COLUMN(float, yerrLocal), - - // global coord - SOA_COLUMN(float, xGlobal), - SOA_COLUMN(float, yGlobal), - SOA_COLUMN(float, zGlobal), - SOA_COLUMN(float, rGlobal), - // global coordinates continue in the 16 bits section - - // cluster properties - SOA_COLUMN(int32_t, charge), - - // 16 bits section (and cluster properties immediately continued) - SOA_COLUMN(int16_t, clusterSizeX), - SOA_COLUMN(int16_t, clusterSizeY) - ) - + // 32 bits section + // local coord + SOA_COLUMN(float, xLocal), + SOA_COLUMN(float, yLocal), + SOA_COLUMN(float, xerrLocal), + SOA_COLUMN(float, yerrLocal), + + // global coord + SOA_COLUMN(float, xGlobal), + SOA_COLUMN(float, yGlobal), + SOA_COLUMN(float, zGlobal), + SOA_COLUMN(float, rGlobal), + // global coordinates continue in the 16 bits section + + // cluster properties + SOA_COLUMN(int32_t, charge), + + // 16 bits section (and cluster properties immediately continued) + SOA_COLUMN(int16_t, clusterSizeX), + SOA_COLUMN(int16_t, clusterSizeY)) + // The hits layout does not use default alignment but a more relaxed one. using HitsLayout = HitsLayoutTemplate; - + GENERATE_SOA_VIEW(HitsViewTemplate, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(HitsLayout, hitsLayout) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(hitsLayout, xLocal), - SOA_VIEW_VALUE(hitsLayout, yLocal), - SOA_VIEW_VALUE(hitsLayout, xerrLocal), - SOA_VIEW_VALUE(hitsLayout, yerrLocal), - - SOA_VIEW_VALUE(hitsLayout, xGlobal), - SOA_VIEW_VALUE(hitsLayout, yGlobal), - SOA_VIEW_VALUE(hitsLayout, zGlobal), - SOA_VIEW_VALUE(hitsLayout, rGlobal), - - SOA_VIEW_VALUE(hitsLayout, charge), - SOA_VIEW_VALUE(hitsLayout, clusterSizeX), - SOA_VIEW_VALUE(hitsLayout, clusterSizeY) - ) - ) - + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(HitsLayout, hitsLayout)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(hitsLayout, xLocal), + SOA_VIEW_VALUE(hitsLayout, yLocal), + SOA_VIEW_VALUE(hitsLayout, xerrLocal), + SOA_VIEW_VALUE(hitsLayout, yerrLocal), + + SOA_VIEW_VALUE(hitsLayout, xGlobal), + SOA_VIEW_VALUE(hitsLayout, yGlobal), + SOA_VIEW_VALUE(hitsLayout, zGlobal), + SOA_VIEW_VALUE(hitsLayout, rGlobal), + + SOA_VIEW_VALUE(hitsLayout, charge), + SOA_VIEW_VALUE(hitsLayout, clusterSizeX), + SOA_VIEW_VALUE(hitsLayout, clusterSizeY))) + using HitsView = HitsViewTemplate<>; - + GENERATE_SOA_LAYOUT(SupportObjectsLayoutTemplate, - // This is the end of the data which is transferred to host. The following columns are supporting - // objects, not transmitted - - // Supporting data (32 bits aligned) - SOA_COLUMN(TrackingRecHit2DSOAStore::PhiBinner::index_type, phiBinnerStorage), - - // global coordinates (not transmitted) - SOA_COLUMN(int16_t, iphi), - - // cluster properties (not transmitted) - SOA_COLUMN(uint16_t, detectorIndex) - ); - + // This is the end of the data which is transferred to host. The following columns are supporting + // objects, not transmitted + + // Supporting data (32 bits aligned) + SOA_COLUMN(TrackingRecHit2DSOAStore::PhiBinner::index_type, phiBinnerStorage), + + // global coordinates (not transmitted) + SOA_COLUMN(int16_t, iphi), + + // cluster properties (not transmitted) + SOA_COLUMN(uint16_t, detectorIndex)) + // The support objects layouts also not use default alignment but a more relaxed one. using SupportObjectsLayout = SupportObjectsLayoutTemplate; - + GENERATE_SOA_VIEW(HitsAndSupportViewTemplate, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(HitsLayout, hitsLayout), - SOA_VIEW_LAYOUT(SupportObjectsLayout, supportObjectsLayout) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(hitsLayout, xLocal), - SOA_VIEW_VALUE(hitsLayout, yLocal), - SOA_VIEW_VALUE(hitsLayout, xerrLocal), - SOA_VIEW_VALUE(hitsLayout, yerrLocal), - - SOA_VIEW_VALUE(hitsLayout, xGlobal), - SOA_VIEW_VALUE(hitsLayout, yGlobal), - SOA_VIEW_VALUE(hitsLayout, zGlobal), - SOA_VIEW_VALUE(hitsLayout, rGlobal), - - SOA_VIEW_VALUE(hitsLayout, charge), - SOA_VIEW_VALUE(hitsLayout, clusterSizeX), - SOA_VIEW_VALUE(hitsLayout, clusterSizeY), - - SOA_VIEW_VALUE(supportObjectsLayout, phiBinnerStorage), - SOA_VIEW_VALUE(supportObjectsLayout, iphi), - SOA_VIEW_VALUE(supportObjectsLayout, detectorIndex) - ) - ); - + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(HitsLayout, hitsLayout), + SOA_VIEW_LAYOUT(SupportObjectsLayout, supportObjectsLayout)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(hitsLayout, xLocal), + SOA_VIEW_VALUE(hitsLayout, yLocal), + SOA_VIEW_VALUE(hitsLayout, xerrLocal), + SOA_VIEW_VALUE(hitsLayout, yerrLocal), + + SOA_VIEW_VALUE(hitsLayout, xGlobal), + SOA_VIEW_VALUE(hitsLayout, yGlobal), + SOA_VIEW_VALUE(hitsLayout, zGlobal), + SOA_VIEW_VALUE(hitsLayout, rGlobal), + + SOA_VIEW_VALUE(hitsLayout, charge), + SOA_VIEW_VALUE(hitsLayout, clusterSizeX), + SOA_VIEW_VALUE(hitsLayout, clusterSizeY), + + SOA_VIEW_VALUE(supportObjectsLayout, phiBinnerStorage), + SOA_VIEW_VALUE(supportObjectsLayout, iphi), + SOA_VIEW_VALUE(supportObjectsLayout, detectorIndex))) + using HitsAndSupportView = HitsAndSupportViewTemplate; - + // Shortcut operator saving the explicit calls to view in usage. - __device__ __forceinline__ HitsAndSupportView::element operator[] (size_t index) { - return m_hitsAndSupportView[index]; + __device__ __forceinline__ HitsAndSupportView::element operator[](size_t index) { + return m_hitsAndSupportView[index]; } - __device__ __forceinline__ HitsAndSupportView::const_element operator[] (size_t index) const { + __device__ __forceinline__ HitsAndSupportView::const_element operator[](size_t index) const { return m_hitsAndSupportView[index]; } - + __device__ __forceinline__ pixelCPEforGPU::ParamsOnGPU const& cpeParams() const { return *m_cpeParams; } __device__ __forceinline__ uint32_t hitsModuleStart(int i) const { return __ldg(m_hitsModuleStart + i); } @@ -153,7 +141,7 @@ class TrackingRecHit2DSOAStore { SupportObjectsLayout m_supportObjectsLayout; // Global view simplifying usage HitsAndSupportView m_hitsAndSupportView; - + // individually defined supporting objects // m_averageGeometry is corrected for beam spot, not sure where to host it otherwise AverageGeometry* m_averageGeometry; // owned by TrackingRecHit2DHeterogeneous diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h index 711675a5d..af60caf9d 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMapping.h @@ -26,34 +26,30 @@ struct SiPixelROCsStatusAndMapping { }; GENERATE_SOA_LAYOUT(SiPixelROCsStatusAndMappingLayoutTemplate, - SOA_COLUMN(unsigned int, fed), - SOA_COLUMN(unsigned int, link), - SOA_COLUMN(unsigned int, roc), - SOA_COLUMN(unsigned int, rawId), - SOA_COLUMN(unsigned int, rocInDet), - SOA_COLUMN(unsigned int, moduleId), - SOA_COLUMN(unsigned char, badRocs), - SOA_SCALAR(unsigned int, size) -) + SOA_COLUMN(unsigned int, fed), + SOA_COLUMN(unsigned int, link), + SOA_COLUMN(unsigned int, roc), + SOA_COLUMN(unsigned int, rawId), + SOA_COLUMN(unsigned int, rocInDet), + SOA_COLUMN(unsigned int, moduleId), + SOA_COLUMN(unsigned char, badRocs), + SOA_SCALAR(unsigned int, size)) using SiPixelROCsStatusAndMappingLayout = SiPixelROCsStatusAndMappingLayoutTemplate<>; GENERATE_SOA_CONST_VIEW(SiPixelROCsStatusAndMappingConstViewTemplate, - SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SiPixelROCsStatusAndMappingLayout, mappingLayout)), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(mappingLayout, fed), - SOA_VIEW_VALUE(mappingLayout, link), - SOA_VIEW_VALUE(mappingLayout, roc), - SOA_VIEW_VALUE(mappingLayout, rawId), - SOA_VIEW_VALUE(mappingLayout, rocInDet), - SOA_VIEW_VALUE(mappingLayout, moduleId), - SOA_VIEW_VALUE(mappingLayout, badRocs), - SOA_VIEW_VALUE(mappingLayout, size) - ) -) + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SiPixelROCsStatusAndMappingLayout, mappingLayout)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(mappingLayout, fed), + SOA_VIEW_VALUE(mappingLayout, link), + SOA_VIEW_VALUE(mappingLayout, roc), + SOA_VIEW_VALUE(mappingLayout, rawId), + SOA_VIEW_VALUE(mappingLayout, rocInDet), + SOA_VIEW_VALUE(mappingLayout, moduleId), + SOA_VIEW_VALUE(mappingLayout, badRocs), + SOA_VIEW_VALUE(mappingLayout, size))) // Slightly more complex than using, but allows forward declarations. -struct SiPixelROCsStatusAndMappingConstView: public SiPixelROCsStatusAndMappingConstViewTemplate<> { +struct SiPixelROCsStatusAndMappingConstView : public SiPixelROCsStatusAndMappingConstViewTemplate<> { using SiPixelROCsStatusAndMappingConstViewTemplate<>::SiPixelROCsStatusAndMappingConstViewTemplate; }; diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc index 2b7cc79cd..c09ed1852 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.cc @@ -24,22 +24,22 @@ SiPixelROCsStatusAndMappingWrapper::SiPixelROCsStatusAndMappingWrapper(SiPixelRO std::copy(modToUnp.begin(), modToUnp.end(), modToUnpDefault.begin()); } -SiPixelROCsStatusAndMappingConstView SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync(cudaStream_t cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, - [this](GPUData& data, cudaStream_t stream) { - // allocate - data.allocate(stream); - // transfer - cms::cuda::copyAsync(data.cablingMapDevice, this->cablingMapHost, stream); - } - ); +SiPixelROCsStatusAndMappingConstView SiPixelROCsStatusAndMappingWrapper::getGPUProductAsync( + cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { + // allocate + data.allocate(stream); + // transfer + cms::cuda::copyAsync(data.cablingMapDevice, this->cablingMapHost, stream); + }); return data.cablingMapDeviceView; } const unsigned char* SiPixelROCsStatusAndMappingWrapper::getModToUnpAllAsync(cudaStream_t cudaStream) const { const auto& data = modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cudaStream_t stream) { - data.modToUnpDefault = cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE_BYTE_BOOL, stream); + data.modToUnpDefault = + cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE_BYTE_BOOL, stream); cudaCheck(cudaMemcpyAsync(data.modToUnpDefault.get(), this->modToUnpDefault.data(), this->modToUnpDefault.size() * sizeof(unsigned char), diff --git a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h index 61f0f5b3c..120ce4d29 100644 --- a/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h +++ b/src/cudadev/CondFormats/SiPixelROCsStatusAndMappingWrapper.h @@ -15,7 +15,7 @@ class SiPixelROCsStatusAndMappingWrapper { public: /* This is using a layout as the size is needed. TODO: use views when views start embedding size. */ explicit SiPixelROCsStatusAndMappingWrapper(SiPixelROCsStatusAndMapping const &cablingMap, - std::vector modToUnp); + std::vector modToUnp); bool hasQuality() const { return hasQuality_; } @@ -35,27 +35,26 @@ class SiPixelROCsStatusAndMappingWrapper { void allocate(cudaStream_t stream) { cablingMapDevice = cms::cuda::make_device_unique(stream); // Populate the view with individual column pointers - auto & cmd = *cablingMapDevice; + auto &cmd = *cablingMapDevice; cablingMapDeviceView = SiPixelROCsStatusAndMappingConstView( - pixelgpudetails::MAX_SIZE, - cmd.fed, // Those are array pointers (in device, but we won't dereference them here). - cmd.link, - cmd.roc, - cmd.rawId, - cmd.rocInDet, - cmd.moduleId, - cmd.badRocs, - &cmd.size // This is a scalar, we need the address-of operator + pixelgpudetails::MAX_SIZE, + cmd.fed, // Those are array pointers (in device, but we won't dereference them here). + cmd.link, + cmd.roc, + cmd.rawId, + cmd.rocInDet, + cmd.moduleId, + cmd.badRocs, + &cmd.size // This is a scalar, we need the address-of operator ); } cms::cuda::device::unique_ptr cablingMapDevice; - SiPixelROCsStatusAndMappingConstView cablingMapDeviceView; // map struct in GPU - + SiPixelROCsStatusAndMappingConstView cablingMapDeviceView; // map struct in GPU }; cms::cuda::ESProduct gpuData_; struct ModulesToUnpack { - cms::cuda::device::unique_ptr modToUnpDefault; // pointer to GPU + cms::cuda::device::unique_ptr modToUnpDefault; // pointer to GPU }; cms::cuda::ESProduct modToUnp_; }; diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index cd3ac76cb..28727319e 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -25,181 +25,184 @@ // Exception throwing (or willful crash in kernels) #if defined(__CUDACC__) && defined(__CUDA_ARCH__) -#define SOA_THROW_OUT_OF_RANGE(A) { printf (A); *((char *)nullptr) = 0; } +#define SOA_THROW_OUT_OF_RANGE(A) \ + { \ + printf(A); \ + *((char*)nullptr) = 0; \ + } #else -#define SOA_THROW_OUT_OF_RANGE(A) { throw std::out_of_range(A); } +#define SOA_THROW_OUT_OF_RANGE(A) \ + { throw std::out_of_range(A); } #endif // compile-time sized SoA namespace cms::soa { -enum class RestrictQualify : bool { Enabled, Disabled, Default = Disabled }; - -enum class RangeChecking: bool { Enabled, Disabled, Default = Disabled }; - -template -struct add_restrict {}; - -template -struct add_restrict { - typedef T Value; - typedef T * __restrict__ Pointer; - typedef T & __restrict__ Reference; - typedef const T ConstValue; - typedef const T * __restrict__ PointerToConst; - typedef const T & __restrict__ ReferenceToConst; -}; - -template -struct add_restrict { - typedef T Value; - typedef T * Pointer; - typedef T & Reference; - typedef const T ConstValue; - typedef const T * PointerToConst; - typedef const T & ReferenceToConst; -}; - -// Helper template managing the value within it column -// The optional compile time alignment parameter enables informing the -// compiler of alignment (enforced by caller). -template -class SoAValue { -public: - typedef add_restrict Restr; - typedef typename Restr::Value Val; - typedef typename Restr::Pointer Ptr; - typedef typename Restr::Reference Ref; - typedef typename Restr::PointerToConst PtrToConst; - typedef typename Restr::ReferenceToConst RefToConst; - SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} - /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE Ref operator()() { - // Ptr type will add the restrict qualifyer if needed - Ptr col = alignedCol(); - return col[idx_]; - } - SOA_HOST_DEVICE_INLINE RefToConst operator()() const { - // PtrToConst type will add the restrict qualifyer if needed - PtrToConst col = alignedCol(); - return col[idx_]; - } - SOA_HOST_DEVICE_INLINE Ptr operator&() { return &alignedCol()[idx_]; } - SOA_HOST_DEVICE_INLINE PtrToConst operator&() const { return &alignedCol()[idx_]; } - template - SOA_HOST_DEVICE_INLINE Ref operator=(const T2& v) { - return alignedCol()[idx_] = v; - } - typedef Val valueType; - static constexpr auto valueSize = sizeof(T); + enum class RestrictQualify : bool { Enabled, Disabled, Default = Disabled }; + + enum class RangeChecking : bool { Enabled, Disabled, Default = Disabled }; + + template + struct add_restrict {}; + + template + struct add_restrict { + typedef T Value; + typedef T* __restrict__ Pointer; + typedef T& __restrict__ Reference; + typedef const T ConstValue; + typedef const T* __restrict__ PointerToConst; + typedef const T& __restrict__ ReferenceToConst; + }; + + template + struct add_restrict { + typedef T Value; + typedef T* Pointer; + typedef T& Reference; + typedef const T ConstValue; + typedef const T* PointerToConst; + typedef const T& ReferenceToConst; + }; -private: - SOA_HOST_DEVICE_INLINE Ptr alignedCol() const { - if constexpr (ALIGNMENT) { - return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + // Helper template managing the value within it column + // The optional compile time alignment parameter enables informing the + // compiler of alignment (enforced by caller). + template + class SoAValue { + public: + typedef add_restrict Restr; + typedef typename Restr::Value Val; + typedef typename Restr::Pointer Ptr; + typedef typename Restr::Reference Ref; + typedef typename Restr::PointerToConst PtrToConst; + typedef typename Restr::ReferenceToConst RefToConst; + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} + /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ + SOA_HOST_DEVICE_INLINE Ref operator()() { + // Ptr type will add the restrict qualifyer if needed + Ptr col = alignedCol(); + return col[idx_]; } - return reinterpret_cast(col_); - } - size_t idx_; - T* col_; -}; - -// Helper template managing the value within it column -template -class SoAConstValue { -public: - typedef add_restrict Restr; - typedef typename Restr::Value Val; - typedef typename Restr::Pointer Ptr; - typedef typename Restr::Reference Ref; - typedef typename Restr::PointerToConst PtrToConst; - typedef typename Restr::ReferenceToConst RefToConst; - SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T* col) : idx_(i), col_(col) {} - /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ - SOA_HOST_DEVICE_INLINE RefToConst operator()() const { - // Ptr type will add the restrict qualifyer if needed - PtrToConst col = alignedCol(); - return col[idx_]; - } - SOA_HOST_DEVICE_INLINE const T* operator&() const { return &alignedCol()[idx_]; } - typedef T valueType; - static constexpr auto valueSize = sizeof(T); - -private: - SOA_HOST_DEVICE_INLINE PtrToConst alignedCol() const { - if constexpr (ALIGNMENT) { - return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + SOA_HOST_DEVICE_INLINE RefToConst operator()() const { + // PtrToConst type will add the restrict qualifyer if needed + PtrToConst col = alignedCol(); + return col[idx_]; } - return reinterpret_cast(col_) ; - } - size_t idx_; - const T* col_; -}; - -// Helper template managing the value within it column -// TODO Create a const variant to avoid leaking mutable access. -template -class SoAEigenValue { -public: - typedef C Type; - typedef Eigen::Map> MapType; - typedef Eigen::Map> CMapType; - SOA_HOST_DEVICE_INLINE SoAEigenValue(size_t i, typename C::Scalar* col, size_t stride) - : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), - crCol_(col), - cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), - stride_(stride) {} - SOA_HOST_DEVICE_INLINE MapType& operator()() { return val_; } - SOA_HOST_DEVICE_INLINE const CMapType& operator()() const { return cVal_; } - SOA_HOST_DEVICE_INLINE operator C() { return val_; } - SOA_HOST_DEVICE_INLINE operator const C() const { return cVal_; } - SOA_HOST_DEVICE_INLINE C* operator&() { return &val_; } - SOA_HOST_DEVICE_INLINE const C* operator&() const { return &cVal_; } - template - SOA_HOST_DEVICE_INLINE MapType& operator=(const C2& v) { - return val_ = v; - } - typedef typename C::Scalar ValueType; - static constexpr auto valueSize = sizeof(C::Scalar); - SOA_HOST_DEVICE_INLINE size_t stride() { return stride_; } - -private: - MapType val_; - const typename C::Scalar* __restrict__ crCol_; - CMapType cVal_; - size_t stride_; -}; - -// Helper template to avoid commas in macro -template -struct EigenConstMapMaker { - typedef Eigen::Map> Type; - class DataHolder { + SOA_HOST_DEVICE_INLINE Ptr operator&() { return &alignedCol()[idx_]; } + SOA_HOST_DEVICE_INLINE PtrToConst operator&() const { return &alignedCol()[idx_]; } + template + SOA_HOST_DEVICE_INLINE Ref operator=(const T2& v) { + return alignedCol()[idx_] = v; + } + typedef Val valueType; + static constexpr auto valueSize = sizeof(T); + + private: + SOA_HOST_DEVICE_INLINE Ptr alignedCol() const { + if constexpr (ALIGNMENT) { + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + } + return reinterpret_cast(col_); + } + size_t idx_; + T* col_; + }; + + // Helper template managing the value within it column + template + class SoAConstValue { public: - DataHolder(const typename C::Scalar* data) : data_(data) {} - EigenConstMapMaker::Type withStride(size_t stride) { - return EigenConstMapMaker::Type( - data_, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)); + typedef add_restrict Restr; + typedef typename Restr::Value Val; + typedef typename Restr::Pointer Ptr; + typedef typename Restr::Reference Ref; + typedef typename Restr::PointerToConst PtrToConst; + typedef typename Restr::ReferenceToConst RefToConst; + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T* col) : idx_(i), col_(col) {} + /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ + SOA_HOST_DEVICE_INLINE RefToConst operator()() const { + // Ptr type will add the restrict qualifyer if needed + PtrToConst col = alignedCol(); + return col[idx_]; } + SOA_HOST_DEVICE_INLINE const T* operator&() const { return &alignedCol()[idx_]; } + typedef T valueType; + static constexpr auto valueSize = sizeof(T); private: - const typename C::Scalar* const data_; + SOA_HOST_DEVICE_INLINE PtrToConst alignedCol() const { + if constexpr (ALIGNMENT) { + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + } + return reinterpret_cast(col_); + } + size_t idx_; + const T* col_; }; - static DataHolder withData(const typename C::Scalar* data) { return DataHolder(data); } -}; -// Helper function to compute aligned size -inline size_t alignSize(size_t size, size_t alignment = 128) { - if (size) - return ((size - 1) / alignment + 1) * alignment; - else - return 0; -} + // Helper template managing the value within it column + // TODO Create a const variant to avoid leaking mutable access. + template + class SoAEigenValue { + public: + typedef C Type; + typedef Eigen::Map> MapType; + typedef Eigen::Map> CMapType; + SOA_HOST_DEVICE_INLINE SoAEigenValue(size_t i, typename C::Scalar* col, size_t stride) + : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + crCol_(col), + cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + stride_(stride) {} + SOA_HOST_DEVICE_INLINE MapType& operator()() { return val_; } + SOA_HOST_DEVICE_INLINE const CMapType& operator()() const { return cVal_; } + SOA_HOST_DEVICE_INLINE operator C() { return val_; } + SOA_HOST_DEVICE_INLINE operator const C() const { return cVal_; } + SOA_HOST_DEVICE_INLINE C* operator&() { return &val_; } + SOA_HOST_DEVICE_INLINE const C* operator&() const { return &cVal_; } + template + SOA_HOST_DEVICE_INLINE MapType& operator=(const C2& v) { + return val_ = v; + } + typedef typename C::Scalar ValueType; + static constexpr auto valueSize = sizeof(C::Scalar); + SOA_HOST_DEVICE_INLINE size_t stride() { return stride_; } -} // namespace cms::soa + private: + MapType val_; + const typename C::Scalar* __restrict__ crCol_; + CMapType cVal_; + size_t stride_; + }; + + // Helper template to avoid commas in macro + template + struct EigenConstMapMaker { + typedef Eigen::Map> Type; + class DataHolder { + public: + DataHolder(const typename C::Scalar* data) : data_(data) {} + EigenConstMapMaker::Type withStride(size_t stride) { + return EigenConstMapMaker::Type( + data_, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)); + } + + private: + const typename C::Scalar* const data_; + }; + static DataHolder withData(const typename C::Scalar* data) { return DataHolder(data); } + }; + + // Helper function to compute aligned size + inline size_t alignSize(size_t size, size_t alignment = 128) { + if (size) + return ((size - 1) / alignment + 1) * alignment; + else + return 0; + } + +} // namespace cms::soa /* declare "scalars" (one value shared across the whole SoA) and "columns" (one value per element) */ #define _VALUE_TYPE_SCALAR 0 @@ -208,9 +211,13 @@ inline size_t alignSize(size_t size, size_t alignment = 128) { namespace cms::soa { -enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COLUMN, eigen = _VALUE_TYPE_EIGEN_COLUMN }; + enum class SoAColumnType { + scalar = _VALUE_TYPE_SCALAR, + column = _VALUE_TYPE_COLUMN, + eigen = _VALUE_TYPE_EIGEN_COLUMN + }; -} // namespace cms::soa +} // namespace cms::soa #define SOA_SCALAR(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) #define SOA_COLUMN(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) @@ -234,83 +241,90 @@ enum class SoAColumnType { scalar = _VALUE_TYPE_SCALAR, column = _VALUE_TYPE_COL namespace cms::soa { -/* Column accessors: templates implementing the global accesors (soa::x() and soa::x(index) */ -enum class SoAAccessType: bool { mutableAccess, constAccess }; - -template -struct SoAColumnAccessorsImpl {}; - - -// Todo: add alignment support. -// Sfinae based const/non const variants. -// Column -template -struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T * baseAddress): baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE T * operator()() { return baseAddress_; } - SOA_HOST_DEVICE_INLINE T & operator()(size_t index) { return baseAddress_[index]; } -private: - T * baseAddress_; -}; - -// Const column -template -struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T * baseAddress): baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE const T * operator()() const { return baseAddress_; } - SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return baseAddress_[index]; } -private: - const T * baseAddress_; -}; - -// Scalar -template -struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T * baseAddress): baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE T & operator() () { return *baseAddress_; } - SOA_HOST_DEVICE_INLINE void operator() (size_t index) const { assert (false && "Indexed access impossible for SoA scalars."); } -private: - T * baseAddress_; -}; - -// Const scalar -template -struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T * baseAddress): baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE T operator() () const { return *baseAddress_; } - SOA_HOST_DEVICE_INLINE void operator() (size_t index) const { assert (false && "Indexed access impossible for SoA scalars."); } -private: - const T * baseAddress_; -}; - -/* A helper template stager avoiding comma in macros */ -template -struct SoAAccessors{ - using myInt = int; - template - struct ColumnType { + /* Column accessors: templates implementing the global accesors (soa::x() and soa::x(index) */ + enum class SoAAccessType : bool { mutableAccess, constAccess }; + + template + struct SoAColumnAccessorsImpl {}; + + // Todo: add alignment support. + // Sfinae based const/non const variants. + // Column + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE T* operator()() { return baseAddress_; } + SOA_HOST_DEVICE_INLINE T& operator()(size_t index) { return baseAddress_[index]; } + + private: + T* baseAddress_; + }; + + // Const column + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE const T* operator()() const { return baseAddress_; } + SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return baseAddress_[index]; } + + private: + const T* baseAddress_; + }; + + // Scalar + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE T& operator()() { return *baseAddress_; } + SOA_HOST_DEVICE_INLINE void operator()(size_t index) const { + assert(false && "Indexed access impossible for SoA scalars."); + } + + private: + T* baseAddress_; + }; + + // Const scalar + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE T operator()() const { return *baseAddress_; } + SOA_HOST_DEVICE_INLINE void operator()(size_t index) const { + assert(false && "Indexed access impossible for SoA scalars."); + } + + private: + const T* baseAddress_; + }; + + /* A helper template stager avoiding comma in macros */ + template + struct SoAAccessors { using myInt = int; - template - struct AccessType: public SoAColumnAccessorsImpl { + template + struct ColumnType { using myInt = int; - using SoAColumnAccessorsImpl::SoAColumnAccessorsImpl; + template + struct AccessType : public SoAColumnAccessorsImpl { + using myInt = int; + using SoAColumnAccessorsImpl::SoAColumnAccessorsImpl; + }; }; }; -}; -/* Enum parameters allowing templated control of layout/view behaviors */ -/* Alignement enforcement verifies every column is aligned, and + /* Enum parameters allowing templated control of layout/view behaviors */ + /* Alignement enforcement verifies every column is aligned, and * hints the compiler that it can expect column pointers to be aligned */ -enum class AlignmentEnforcement : bool { Relaxed, Enforced }; - -struct CacheLineSize { - static constexpr size_t NvidiaGPU = 128; - static constexpr size_t IntelCPU = 64; - static constexpr size_t AMDCPU = 64; - static constexpr size_t ARMCPU = 64; - static constexpr size_t defaultSize = NvidiaGPU; -}; + enum class AlignmentEnforcement : bool { Relaxed, Enforced }; + + struct CacheLineSize { + static constexpr size_t NvidiaGPU = 128; + static constexpr size_t IntelCPU = 64; + static constexpr size_t AMDCPU = 64; + static constexpr size_t ARMCPU = 64; + static constexpr size_t defaultSize = NvidiaGPU; + }; -} // namespace cms::soa +} // namespace cms::soa #endif // ndef DataStructures_SoACommon_h diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 058d7d91a..0f17adde7 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -92,37 +92,33 @@ /** * SoAMetadata member computing column pitch */ -#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, \ - /* Scalar */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ - } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ - CPP_TYPE const * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ - CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ - , \ - /* Column */ \ - CPP_TYPE const * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ - CPP_TYPE * BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ - ParentClass::byteAlignment; \ - } \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column; \ - , \ - /* Eigen column */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ - ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - } \ - typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ - CPP_TYPE::Scalar const * BOOST_PP_CAT(addressOf_, NAME)() const { return parent_.BOOST_PP_CAT(NAME, _); } \ - CPP_TYPE::Scalar * BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ - ) +#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ + CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); }, /* Column */ \ + CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() \ + const { return parent_.BOOST_PP_CAT(NAME, _); } CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ + ParentClass::byteAlignment; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column; \ + , /* Eigen column */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ + ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ + CPP_TYPE::Scalar const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } CPP_TYPE::Scalar* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); }) #define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME @@ -227,18 +223,19 @@ /* * A macro defining a SoA layout (collection of scalars and columns of equal lengths) */ -#define GENERATE_SOA_LAYOUT(CLASS, ...) \ +// clang-format off +#define GENERATE_SOA_LAYOUT(CLASS, ...) \ template \ + cms::soa::AlignmentEnforcement ALIGNMENT_ENFORCEMENT = cms::soa::AlignmentEnforcement::Relaxed> \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ \ - /* For CUDA applications, we align to the 128 bytes of the cache lines. \ - * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ - * up to compute capability 8.X. \ - */ \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ constexpr static size_t defaultAlignment = 128; \ constexpr static size_t byteAlignment = ALIGNMENT; \ constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ @@ -271,22 +268,25 @@ return ret; \ } \ \ - /** \ - * Helper/friend class allowing SoA introspection. \ - */ \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ struct SoAMetadata { \ friend CLASS; \ SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ SOA_HOST_DEVICE_INLINE size_t byteSize() const { return parent_.byteSize_; } \ SOA_HOST_DEVICE_INLINE size_t byteAlignment() const { return CLASS::byteAlignment; } \ - SOA_HOST_DEVICE_INLINE std::byte* data() { return parent_.mem_; } \ + SOA_HOST_DEVICE_INLINE std::byte* data() { return parent_.mem_; } \ SOA_HOST_DEVICE_INLINE const std::byte* data() const { return parent_.mem_; } \ SOA_HOST_DEVICE_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ - SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) const { return CLASS(addr, parent_.nElements_); } \ + SOA_HOST_DEVICE_INLINE CLASS cloneToNewAddress(std::byte* addr) const { \ + return CLASS(addr, parent_.nElements_); \ + } \ _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ \ - SoAMetadata & operator=(const SoAMetadata &) = delete; \ - SoAMetadata(const SoAMetadata &) = delete; \ + SoAMetadata& operator=(const SoAMetadata&) = delete; \ + SoAMetadata(const SoAMetadata&) = delete; \ + \ private: \ SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ const CLASS& parent_; \ @@ -344,5 +344,6 @@ size_t byteSize_; \ _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ }; +// clang-format on #endif // ndef DataStructures_SoALayout_h diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 6aee9d1f6..4a34cf759 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -40,31 +40,33 @@ namespace cms::soa { -/* Traits for the different column type scenarios */ -/* Value traits passes the class as is in the case of column type and return + /* Traits for the different column type scenarios */ + /* Value traits passes the class as is in the case of column type and return * an empty class with functions returning non-scalar as accessors. */ -template -struct ConstValueTraits {}; + template + struct ConstValueTraits {}; -template -struct ConstValueTraits : public C { using C::C; }; + template + struct ConstValueTraits : public C { + using C::C; + }; -template -struct ConstValueTraits { - // Just take to SoAValue type to generate the right constructor. - SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType *) {} - // Any attempt to do anything with the "scalar" value a const element will fail. -}; + template + struct ConstValueTraits { + // Just take to SoAValue type to generate the right constructor. + SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType*) {} + // Any attempt to do anything with the "scalar" value a const element will fail. + }; -template -struct ConstValueTraits { - // Just take to SoAValue type to generate the right constructor. - SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType *) {} - // TODO: implement - // Any attempt to do anything with the eigen value a const element will fail. -}; + template + struct ConstValueTraits { + // Just take to SoAValue type to generate the right constructor. + SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType*) {} + // TODO: implement + // Any attempt to do anything with the eigen value a const element will fail. + }; -} // namespace cms::soa; +} // namespace cms::soa #include /* @@ -84,14 +86,14 @@ struct ConstValueTraits { */ #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ typedef typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, LAYOUT_MEMBER) \ - BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ + BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ - SOA_HOST_DEVICE_INLINE \ - DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ - return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ - }; \ - static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::eigen, \ + SOA_HOST_DEVICE_INLINE \ + DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ + return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ + }; \ + static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::eigen, \ "Eigen columns not supported in views."); #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -121,21 +123,22 @@ struct ConstValueTraits { (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) #define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS(R, DATA, LAYOUT_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + BOOST_PP_EXPAND( \ + _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) /** * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ - (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ - static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != cms::soa::SoAColumnType::eigen, \ - "Eigen values not supported in views"); \ - auto addr = LAYOUT.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ - if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ - if (reinterpret_cast(addr) % byteAlignment) \ - throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ - return addr; \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ + static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != cms::soa::SoAColumnType::eigen, \ + "Eigen values not supported in views"); \ + auto addr = LAYOUT.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(addr) % byteAlignment) \ + throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ + return addr; \ }())) #define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -145,28 +148,27 @@ struct ConstValueTraits { * Generator of size computation for constructor. * This is the per-layout part of the lambda checking they all have the same size. */ -#define _UPDATE_SIZE_OF_VIEW_IMPL(LAYOUT_TYPE, LAYOUT_NAME) \ - if (set) { \ - if (ret != LAYOUT_NAME.soaMetadata().size()) \ - throw std::out_of_range("In constructor by layout: different sizes from layouts."); \ - } else { \ - ret = LAYOUT_NAME.soaMetadata().size(); \ - set = true; \ +#define _UPDATE_SIZE_OF_VIEW_IMPL(LAYOUT_TYPE, LAYOUT_NAME) \ + if (set) { \ + if (ret != LAYOUT_NAME.soaMetadata().size()) \ + throw std::out_of_range("In constructor by layout: different sizes from layouts."); \ + } else { \ + ret = LAYOUT_NAME.soaMetadata().size(); \ + set = true; \ } -#define _UPDATE_SIZE_OF_VIEW(R, DATA, TYPE_NAME) \ - BOOST_PP_EXPAND(_UPDATE_SIZE_OF_VIEW_IMPL TYPE_NAME) +#define _UPDATE_SIZE_OF_VIEW(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_UPDATE_SIZE_OF_VIEW_IMPL TYPE_NAME) /** * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(LAYOUT, MEMBER, NAME) \ - (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ - if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ - if (reinterpret_cast(NAME) % byteAlignment) \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(LAYOUT, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(NAME) % byteAlignment) \ throw std::out_of_range("In constructor by column: misaligned column: " #NAME); \ - return NAME; \ + return NAME; \ }())) #define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -208,9 +210,11 @@ struct ConstValueTraits { /** * Declaration of the members accessors of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - SOA_HOST_DEVICE_INLINE typename SoAConstValueWithConf::RefToConst LOCAL_NAME() const { \ - return BOOST_PP_CAT(LOCAL_NAME, _)(); \ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE_INLINE \ + typename SoAConstValueWithConf::RefToConst LOCAL_NAME() \ + const { \ + return BOOST_PP_CAT(LOCAL_NAME, _)(); \ } #define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -219,11 +223,10 @@ struct ConstValueTraits { /** * Declaration of the private members of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - const cms::soa::ConstValueTraits< \ - SoAConstValueWithConf, \ - BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, LOCAL_NAME) \ - > BOOST_PP_CAT(LOCAL_NAME, _); +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + const cms::soa::ConstValueTraits, \ + BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, LOCAL_NAME)> \ + BOOST_PP_CAT(LOCAL_NAME, _); #define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL LAYOUT_MEMBER_NAME @@ -231,8 +234,8 @@ struct ConstValueTraits { /** * Generator of the member-by-member copy operator of the element subclass. */ -#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - if constexpr (SoAMetadata:: BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::scalar) \ +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + if constexpr (SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::scalar) \ LOCAL_NAME() = other.LOCAL_NAME(); #define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -258,18 +261,18 @@ struct ConstValueTraits { /** * Direct access to column pointer and indexed access */ -#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - /* Column or scalar */ \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ - return typename cms::soa::SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ - } \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ - return typename cms::soa::SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ - } +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + } \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ + } #define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) @@ -277,18 +280,18 @@ struct ConstValueTraits { /** * Direct access to column pointer (const) and indexed access. */ -#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - /* Column or scalar */ \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ - return typename cms::soa::SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(); \ - } \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) const { \ - return typename cms::soa::SoAAccessors:: \ - template ColumnType:: \ - template AccessType(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ - } +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + } \ + SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) const { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ + } #define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) @@ -303,21 +306,155 @@ struct ConstValueTraits { BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) /* ---- MUTABLE VIEW -------------------------------------------------------------------------------------------------------------------- */ +// clang-format off +#define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ + template \ + struct CLASS { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static size_t defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ + constexpr static size_t byteAlignment = ALIGNMENT; \ + constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ + constexpr static size_t conditionalAlignment = \ + alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ + constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ + constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ + \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + \ + template \ + using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct SoAMetadata { \ + friend CLASS; \ + SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ + /* Alias layout or view types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ + \ + /* Forbid copying to avoid const correctness evasion */ \ + SoAMetadata& operator=(const SoAMetadata&) = delete; \ + SoAMetadata(const SoAMetadata&) = delete; \ + \ + private: \ + SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ + const CLASS& parent_; \ + }; \ + friend SoAMetadata; \ + SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ + SOA_HOST_DEVICE_INLINE SoAMetadata soaMetadata() { return SoAMetadata(*this); } \ + \ + /* Trivial constuctor */ \ + CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on user provided layouts or views */ \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), LAYOUTS_LIST)) \ + : nElements_([&]() -> size_t { \ + bool set = false; \ + size_t ret = 0; \ + _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ + return ret; \ + }()), \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on individually provided column addresses */ \ + SOA_HOST_ONLY CLASS(size_t nElements, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, \ + BOOST_PP_EMPTY(), \ + VALUE_LIST)) \ + : nElements_(nElements), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ + \ + struct const_element { \ + SOA_HOST_DEVICE_INLINE \ + const_element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + struct element { \ + SOA_HOST_DEVICE_INLINE \ + element(size_t index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, BOOST_PP_EMPTY(), VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + SOA_HOST_DEVICE_INLINE \ + element& operator=(const element& other) { \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_COPY, ~, VALUE_LIST) \ + return *this; \ + } \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + /* AoS-like accessor (non-const) */ \ + SOA_HOST_DEVICE_INLINE \ + element operator[](size_t index) { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ + if (index >= nElements_) \ + SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ + } \ + return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ + } \ + \ + /* AoS-like accessor (const) */ \ + SOA_HOST_DEVICE_INLINE \ + const_element operator[](size_t index) const { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ + if (index >= nElements_) \ + SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ + } \ + return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ + } \ + \ + /* accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_ACCESSOR, ~, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + \ + private: \ + size_t nElements_; \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ + }; +// clang-format on -#define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ +/* ---- CONST VIEW --------------------------------------------------------------------------------------------------------------------- */ +// clang-format off +#define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ template \ + cms::soa::AlignmentEnforcement ALIGNMENT_ENFORCEMENT = cms::soa::AlignmentEnforcement::Relaxed, \ + cms::soa::RestrictQualify RESTRICT_QUALIFY = cms::soa::RestrictQualify::Enabled, \ + cms::soa::RangeChecking RANGE_CHECKING = cms::soa::RangeChecking::Disabled> \ struct CLASS { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ - \ - /* For CUDA applications, we align to the 128 bytes of the cache lines. \ - * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ - * up to compute capability 8.X. \ - */ \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ constexpr static size_t defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ constexpr static size_t byteAlignment = ALIGNMENT; \ constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ @@ -325,7 +462,7 @@ struct ConstValueTraits { alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ -/* Those typedefs avoid having commas in macros (which is problematic) */ \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ template \ using SoAValueWithConf = cms::soa::SoAValue; \ \ @@ -334,47 +471,45 @@ struct ConstValueTraits { \ template \ using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ + \ /** \ * Helper/friend class allowing SoA introspection. \ - */ \ + */ \ struct SoAMetadata { \ friend CLASS; \ - SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ - /* Alias layout or view types to name-derived identifyer to allow simpler definitions */ \ + SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ + /* Alias layout/view types to name-derived identifyer to allow simpler definitions */ \ _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ \ /* Alias member types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ + \ + SoAMetadata& operator=(const SoAMetadata&) = delete; \ + SoAMetadata(const SoAMetadata&) = delete; \ \ - /* Forbid copying to avoid const correctness evasion */ \ - SoAMetadata & operator=(const SoAMetadata &) = delete; \ - SoAMetadata(const SoAMetadata &) = delete; \ private: \ SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ const CLASS& parent_; \ }; \ friend SoAMetadata; \ SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ - SOA_HOST_DEVICE_INLINE SoAMetadata soaMetadata() { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ \ /* Constructor relying on user provided layouts or views */ \ - SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), LAYOUTS_LIST)) \ - : nElements_( \ - [&]() -> size_t { \ - bool set = false; \ - size_t ret = 0; \ - _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ - return ret; \ - }() \ - ), \ + SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, LAYOUTS_LIST)) \ + : nElements_([&]() -> size_t { \ + bool set = false; \ + size_t ret = 0; \ + _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ + return ret; \ + }()), \ _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ \ /* Constructor relying on individually provided column addresses */ \ SOA_HOST_ONLY CLASS(size_t nElements, \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, BOOST_PP_EMPTY(), VALUE_LIST)) \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, const, VALUE_LIST)) \ : nElements_(nElements), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ \ struct const_element { \ @@ -388,39 +523,17 @@ struct ConstValueTraits { _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ }; \ \ - struct element { \ - SOA_HOST_DEVICE_INLINE \ - element(size_t index, /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, BOOST_PP_EMPTY(), VALUE_LIST)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ - SOA_HOST_DEVICE_INLINE \ - element& operator=(const element& other) { \ - _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_COPY, ~, VALUE_LIST) \ - return *this; \ - } \ - _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ - }; \ - \ - /* AoS-like accessor (non-const) */ \ - SOA_HOST_DEVICE_INLINE \ - element operator[](size_t index) { \ - if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ - if (index >= nElements_) SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ - } \ - return element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ - } \ - \ /* AoS-like accessor (const) */ \ SOA_HOST_DEVICE_INLINE \ const_element operator[](size_t index) const { \ if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ - if (index >= nElements_) SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ + if (index >= nElements_) \ + SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ } \ return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ } \ \ /* accessors */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_ACCESSOR, ~, VALUE_LIST) \ _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ \ /* dump the SoA internal structure */ \ @@ -429,150 +542,50 @@ struct ConstValueTraits { \ private: \ size_t nElements_; \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ - }; - -/* ---- CONST VIEW --------------------------------------------------------------------------------------------------------------------- */ - -#define GENERATE_SOA_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ - template \ - struct CLASS { \ - /* these could be moved to an external type trait to free up the symbol names */ \ - using self_type = CLASS; \ - typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ - \ - /* For CUDA applications, we align to the 128 bytes of the cache lines. \ - * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ - * up to compute capability 8.X. \ - */ \ - constexpr static size_t defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ - constexpr static size_t byteAlignment = ALIGNMENT; \ - constexpr static AlignmentEnforcement alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ - constexpr static size_t conditionalAlignment = \ - alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ - constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ - constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ - /* Those typedefs avoid having commas in macros (which is problematic) */ \ - template \ - using SoAValueWithConf = cms::soa::SoAValue; \ - \ - template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ - \ - template \ - using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ - \ - /** \ - * Helper/friend class allowing SoA introspection. \ - */ \ - struct SoAMetadata { \ - friend CLASS; \ - SOA_HOST_DEVICE_INLINE size_t size() const { return parent_.nElements_; } \ - /* Alias layout/view types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ - \ - /* Alias member types to name-derived identifyer to allow simpler definitions */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const, VALUE_LIST) \ - \ - SoAMetadata & operator=(const SoAMetadata &) = delete; \ - SoAMetadata(const SoAMetadata &) = delete; \ - private: \ - SOA_HOST_DEVICE_INLINE SoAMetadata(const CLASS& parent) : parent_(parent) {} \ - const CLASS& parent_; \ - }; \ - friend SoAMetadata; \ - SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ - \ - /* Trivial constuctor */ \ - CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ - \ - /* Constructor relying on user provided layouts or views */ \ - SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, LAYOUTS_LIST)) \ - : nElements_( \ - [&]() -> size_t { \ - bool set = false; \ - size_t ret = 0; \ - _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ - return ret; \ - }() \ - ), \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ - \ - /* Constructor relying on individually provided column addresses */ \ - SOA_HOST_ONLY CLASS(size_t nElements, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, const, VALUE_LIST)) \ - : nElements_(nElements), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ - \ - struct const_element { \ - SOA_HOST_DEVICE_INLINE \ - const_element(size_t index, /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ - : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ - _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ - \ - private: \ - _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ - }; \ - \ - /* AoS-like accessor (const) */ \ - SOA_HOST_DEVICE_INLINE \ - const_element operator[](size_t index) const { \ - if constexpr (rangeChecking == cms::soa::RangeChecking::Enabled) { \ - if (index >= nElements_) SOA_THROW_OUT_OF_RANGE("Out of range index in " #CLASS "::operator[]") \ - } \ - return const_element(index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)); \ - } \ - \ - /* accessors */ \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ - \ - /* dump the SoA internal structure */ \ - template \ - SOA_HOST_ONLY friend void dump(); \ - \ - private: \ - size_t nElements_; \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ - }; + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ +}; +// clang-format on /** * Helper macro turning layout field declaration into view field declaration. */ -#define _VIEW_FIELD_FROM_LAYOUT_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) \ - (DATA, NAME, NAME) +#define _VIEW_FIELD_FROM_LAYOUT_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) (DATA, NAME, NAME) #define _VIEW_FIELD_FROM_LAYOUT(R, DATA, VALUE_TYPE_NAME) \ - BOOST_PP_EXPAND ((_VIEW_FIELD_FROM_LAYOUT_IMPL BOOST_PP_TUPLE_PUSH_BACK(VALUE_TYPE_NAME, DATA))) + BOOST_PP_EXPAND((_VIEW_FIELD_FROM_LAYOUT_IMPL BOOST_PP_TUPLE_PUSH_BACK(VALUE_TYPE_NAME, DATA))) /** * A macro defining both layout and view(s) in one go. */ -#define GENERATE_SOA_LAYOUT_VIEW_AND_CONST_VIEW(LAYOUT_NAME, VIEW_NAME, CONST_VIEW_NAME, ... ) \ -GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ -using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME <>; \ -GENERATE_SOA_VIEW(VIEW_NAME, \ - SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ - SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); \ -GENERATE_SOA_CONST_VIEW(CONST_VIEW_NAME, \ - SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME,_default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ - SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); - -#define GENERATE_SOA_LAYOUT_AND_VIEW(LAYOUT_NAME, VIEW_NAME, ... ) \ -GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ -using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME <>; \ -GENERATE_SOA_VIEW(VIEW_NAME, \ - SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ - SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); - - -#define GENERATE_SOA_LAYOUT_AND_CONST_VIEW(LAYOUT_NAME, CONST_VIEW_NAME, ... ) \ -GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ -using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME <>; \ -GENERATE_SOA_CONST_VIEW(CONST_VIEW_NAME, \ - SOA_VIEW_LAYOUT_LIST( (BOOST_PP_CAT(LAYOUT_NAME,_default), BOOST_PP_CAT(instance_, LAYOUT_NAME)) ), \ - SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); +#define GENERATE_SOA_LAYOUT_VIEW_AND_CONST_VIEW(LAYOUT_NAME, VIEW_NAME, CONST_VIEW_NAME, ...) \ + GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ + using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME<>; \ + GENERATE_SOA_VIEW(VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA( \ + _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); \ + GENERATE_SOA_CONST_VIEW( \ + CONST_VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ + SOA_VIEW_VALUE_LIST( \ + _ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + +#define GENERATE_SOA_LAYOUT_AND_VIEW(LAYOUT_NAME, VIEW_NAME, ...) \ + GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ + using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME<>; \ + GENERATE_SOA_VIEW(VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA( \ + _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + +#define GENERATE_SOA_LAYOUT_AND_CONST_VIEW(LAYOUT_NAME, CONST_VIEW_NAME, ...) \ + GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ + using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME<>; \ + GENERATE_SOA_CONST_VIEW( \ + CONST_VIEW_NAME, \ + SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ + SOA_VIEW_VALUE_LIST( \ + _ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); #endif // ndef DataStructures_SoAView_h diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu index 8711740a3..331874b4f 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.cu @@ -62,7 +62,6 @@ namespace pixelgpudetails { return (PixelSubdetector::PixelBarrel == ((rawId >> DetId::kSubdetOffset) & DetId::kSubdetMask)); } - //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 // Convert local pixel to pixelgpudetails::global pixel @@ -494,7 +493,7 @@ namespace pixelgpudetails { // Interface to outside void SiPixelRawToClusterGPUKernel::makeClustersAsync(bool isRun2, const SiPixelClusterThresholds clusterThresholds, - SiPixelROCsStatusAndMappingConstView & cablingMap, + SiPixelROCsStatusAndMappingConstView &cablingMap, const unsigned char *modToUnp, const SiPixelGainForHLTonGPU *gains, const WordFedAppender &wordFed, diff --git a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h index 2297c296e..c3ff57103 100644 --- a/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h +++ b/src/cudadev/plugin-SiPixelClusterizer/SiPixelRawToClusterGPUKernel.h @@ -135,7 +135,7 @@ namespace pixelgpudetails { void makeClustersAsync(bool isRun2, const SiPixelClusterThresholds clusterThresholds, - SiPixelROCsStatusAndMappingConstView & cablingMap, + SiPixelROCsStatusAndMappingConstView& cablingMap, const unsigned char* modToUnp, const SiPixelGainForHLTonGPU* gains, const WordFedAppender& wordFed, diff --git a/src/cudadev/test/SoALayoutAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu index f5fa1794e..71c401063 100644 --- a/src/cudadev/test/SoALayoutAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -9,118 +9,106 @@ // Multiple stores in a buffer // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. - -GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, SoA1ViewTemplate, - // predefined static scalars - // size_t size; - // size_t alignment; - // columns: one value per element - SOA_COLUMN(double, x), - SOA_COLUMN(double, y), - SOA_COLUMN(double, z), - SOA_COLUMN(double, sum), - SOA_COLUMN(double, prod), - /* Leave Eigen definitions out until support is complete. +GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, + SoA1ViewTemplate, + // predefined static scalars + // size_t size; + // size_t alignment; + + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_COLUMN(double, sum), + SOA_COLUMN(double, prod), + /* Leave Eigen definitions out until support is complete. SOA_EIGEN_COLUMN(Eigen::Vector3d, a), SOA_EIGEN_COLUMN(Eigen::Vector3d, b), SOA_EIGEN_COLUMN(Eigen::Vector3d, r),*/ - SOA_COLUMN(uint16_t, color), - SOA_COLUMN(int32_t, value), - SOA_COLUMN(double *, py), - SOA_COLUMN(uint32_t, count), - SOA_COLUMN(uint32_t, anotherCount), + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(int32_t, value), + SOA_COLUMN(double *, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount), - // scalars: one value for the whole structure - SOA_SCALAR(const char *, description), - SOA_SCALAR(uint32_t, someNumber) -) + // scalars: one value for the whole structure + SOA_SCALAR(const char *, description), + SOA_SCALAR(uint32_t, someNumber)) using SoA1Layout = SoA1LayoutTemplate<>; using SoA1View = SoA1ViewTemplate<>; // A partial view (artificial mix of store and view) GENERATE_SOA_VIEW(SoA1View2GTemplate, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(SoA1Layout, soa1), - SOA_VIEW_LAYOUT(SoA1View, soa1v) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(soa1, x), - SOA_VIEW_VALUE(soa1v, y), - SOA_VIEW_VALUE(soa1, color), - SOA_VIEW_VALUE(soa1v, value), - SOA_VIEW_VALUE(soa1v, count), - SOA_VIEW_VALUE(soa1, anotherCount), - SOA_VIEW_VALUE(soa1v, description), - SOA_VIEW_VALUE(soa1, someNumber) - ) -) + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1v, y), + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1v, value), + SOA_VIEW_VALUE(soa1v, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1v, description), + SOA_VIEW_VALUE(soa1, someNumber))) using SoA1View2G = SoA1View2GTemplate<>; - - // Same partial view, yet const. GENERATE_SOA_CONST_VIEW(SoA1View2Gconst, - SOA_VIEW_LAYOUT_LIST( - SOA_VIEW_LAYOUT(SoA1Layout, soa1), - SOA_VIEW_LAYOUT(SoA1View, soa1v) - ), - SOA_VIEW_VALUE_LIST( - SOA_VIEW_VALUE(soa1, x), - SOA_VIEW_VALUE(soa1v, y), -/* Eigen columns are not supported in views. + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1v, y), + /* Eigen columns are not supported in views. SoA_view_value(soa1, a, a), SoA_view_value(soa1, b, b), SoA_view_value(soa1, r, r), */ - SOA_VIEW_VALUE(soa1, color), - SOA_VIEW_VALUE(soa1v, value), - SOA_VIEW_VALUE(soa1v, count), - SOA_VIEW_VALUE(soa1, anotherCount), - SOA_VIEW_VALUE(soa1v, description), - SOA_VIEW_VALUE(soa1, someNumber) - ) -) + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1v, value), + SOA_VIEW_VALUE(soa1v, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1v, description), + SOA_VIEW_VALUE(soa1, someNumber))) // Parameter reusing kernels. The disassembly will indicate whether the compiler uses the wanted cache hits and uses // `restrict` hints avoid multiple reduce loads. // The PTX can be obtained using -ptx insterad of -c when compiling. template -__device__ void addAndMulTemplate ( - T soa, size_t size) { - auto idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= size) return; - auto si = soa[idx]; - si.sum() = si.x() + si.y(); - si.prod() = si.x() * si.y(); - } +__device__ void addAndMulTemplate(T soa, size_t size) { + auto idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size) + return; + auto si = soa[idx]; + si.sum() = si.x() + si.y(); + si.prod() = si.x() * si.y(); +} __global__ void aAMDef(SoA1ViewTemplate soa, size_t size) { + cms::soa::AlignmentEnforcement::Relaxed, + cms::soa::RestrictQualify::Disabled> soa, + size_t size) { addAndMulTemplate(soa, size); } __global__ void aAMRestrict(SoA1ViewTemplate soa, size_t size) { + cms::soa::AlignmentEnforcement::Relaxed, + cms::soa::RestrictQualify::Enabled> soa, + size_t size) { addAndMulTemplate(soa, size); } -const size_t size=10000; +const size_t size = 10000; int main() { // Allocate buffer std::unique_ptr buffer( - static_cast(std::aligned_alloc(SoA1Layout::defaultAlignment, SoA1Layout::computeDataSize(size))), - std::free); + static_cast(std::aligned_alloc(SoA1Layout::defaultAlignment, SoA1Layout::computeDataSize(size))), + std::free); SoA1Layout soa1(buffer.get(), size); - SoA1View soa1view (soa1); - SoA1View2G soa1v2g (soa1, soa1view); - SoA1View2Gconst soa1v2gconst (soa1, soa1view); + SoA1View soa1view(soa1); + SoA1View2G soa1v2g(soa1, soa1view); + SoA1View2Gconst soa1v2gconst(soa1, soa1view); // Write to view - for (size_t i=0; i < size; i++) { + for (size_t i = 0; i < size; i++) { auto s = soa1view[i]; s.x = 1.0 * i; s.y = 2.0 * i; @@ -136,7 +124,7 @@ int main() { s.r() = s.a().cross(s.b());*/ } // Check direct read back - for (size_t i=0; i < size; i++) { + for (size_t i = 0; i < size; i++) { auto s = soa1view[i]; assert(s.x() == 1.0 * i); assert(s.y() == 2.0 * i); @@ -152,7 +140,7 @@ int main() { assert(s.r() == s.a().cross(s.b()));*/ } // Check readback through other views - for (size_t i=0; i < size; i++) { + for (size_t i = 0; i < size; i++) { auto sv = soa1view[i]; auto sv2g = soa1v2g[i]; auto sv2gc = soa1v2gconst[i]; @@ -167,14 +155,18 @@ int main() { assert(sv2gc.y() == 2.0 * i); assert(sv2gc.color() == i); } - + // Validation of range checking try { // Get a view like the default, except for range checking - SoA1ViewTemplate soa1viewRangeChecking(soa1); + SoA1ViewTemplate + soa1viewRangeChecking(soa1); // This should throw an exception [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.soaMetadata().size()]; assert(false); - } catch (const std::out_of_range &) {} + } catch (const std::out_of_range &) { + } } \ No newline at end of file From f1e7f054ee41bbf294ffd33fdb6f0df0bcd3a512 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 27 Jan 2022 13:57:40 +0100 Subject: [PATCH 41/50] [cudadev] Re-added support for Eigen columns Re-enabled Eigen parts of the test. --- src/cudadev/DataFormats/SoACommon.h | 271 ++++++++++++++++++++----- src/cudadev/DataFormats/SoALayout.h | 92 ++++++--- src/cudadev/DataFormats/SoAView.h | 188 ++++++++--------- src/cudadev/bin/Source.h | 7 +- src/cudadev/test/SoALayoutAndView_t.cu | 26 ++- 5 files changed, 394 insertions(+), 190 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 28727319e..0a1ed906e 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -35,10 +35,18 @@ { throw std::out_of_range(A); } #endif -// compile-time sized SoA +/* declare "scalars" (one value shared across the whole SoA) and "columns" (one value per element) */ +#define _VALUE_TYPE_SCALAR 0 +#define _VALUE_TYPE_COLUMN 1 +#define _VALUE_TYPE_EIGEN_COLUMN 2 namespace cms::soa { + enum class SoAColumnType { + scalar = _VALUE_TYPE_SCALAR, + column = _VALUE_TYPE_COLUMN, + eigen = _VALUE_TYPE_EIGEN_COLUMN + }; enum class RestrictQualify : bool { Enabled, Disabled, Default = Disabled }; enum class RangeChecking : bool { Enabled, Disabled, Default = Disabled }; @@ -65,12 +73,133 @@ namespace cms::soa { typedef const T* PointerToConst; typedef const T& ReferenceToConst; }; + template + struct SoAParametersImpl; + + // Templated parameter sets for scalar columns and Eigen columns + template + struct SoAConstParametersImpl { + static const SoAColumnType columnType = COLUMN_TYPE; + typedef T ValueType; + typedef const ValueType* TupleOrPointerType; + const ValueType* addr_ = nullptr; + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const ValueType* addr) : addr_(addr) {} + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const SoAConstParametersImpl& o) { addr_ = o.addr_; } + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const SoAParametersImpl& o) { + addr_ = o.addr_; + } + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl() {} + static bool checkAlignement(ValueType* addr, size_t byteAlignment) { + return reinterpret_cast(addr) % byteAlignment; + } + }; + + template + struct SoAConstParametersImpl { + static const SoAColumnType columnType = SoAColumnType::eigen; + typedef T ValueType; + typedef typename T::Scalar ScalarType; + typedef std::tuple TupleOrPointerType; + const ScalarType* addr_ = nullptr; + size_t stride_ = 0; + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const ScalarType* addr, size_t stride) + : addr_(addr), stride_(stride) {} + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const TupleOrPointerType tuple) + : addr_(std::get<0>(tuple)), stride_(std::get<1>(tuple)) {} + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const ScalarType* addr) : addr_(addr) {} + // Trick setter + return self-reference allowing commat-free 2-stage construction in macro contexts (in combination with the + // addr-only constructor. + SoAConstParametersImpl& setStride(size_t stride) { + stride_ = stride; + return *this; + } + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const SoAConstParametersImpl& o) { + addr_ = o.addr_; + stride_ = o.stride_; + } + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl(const SoAParametersImpl& o) { + addr_ = o.addr_; + stride_ = o.stride_; + } + SOA_HOST_DEVICE_INLINE SoAConstParametersImpl() {} + static bool checkAlignement(const TupleOrPointerType tuple, size_t byteAlignment) { + const auto& [addr, stride] = tuple; + return reinterpret_cast(addr) % byteAlignment; + } + }; + + // Matryoshka template to avoiding commas in macros + template + struct SoAConstParameters_ColumnType { + template + struct DataType : public SoAConstParametersImpl { + using SoAConstParametersImpl::SoAConstParametersImpl; + }; + }; + + // Templated parameter sets for scalar columns and Eigen columns + template + struct SoAParametersImpl { + static const SoAColumnType columnType = COLUMN_TYPE; + typedef T ValueType; + typedef const ValueType* TupleOrPointerType; + typedef SoAConstParametersImpl ConstType; + friend ConstType; + ValueType* addr_ = nullptr; + SOA_HOST_DEVICE_INLINE SoAParametersImpl(ValueType* addr) : addr_(addr) {} + SOA_HOST_DEVICE_INLINE SoAParametersImpl() {} + static bool checkAlignement(ValueType* addr, size_t byteAlignment) { + return reinterpret_cast(addr) % byteAlignment; + } + }; + + template + struct SoAParametersImpl { + static const SoAColumnType columnType = SoAColumnType::eigen; + typedef T ValueType; + typedef SoAConstParametersImpl ConstType; + friend ConstType; + typedef typename T::Scalar ScalarType; + typedef std::tuple TupleOrPointerType; + ScalarType* addr_ = nullptr; + size_t stride_ = 0; + SOA_HOST_DEVICE_INLINE SoAParametersImpl(ScalarType* addr, size_t stride) : addr_(addr), stride_(stride) {} + SOA_HOST_DEVICE_INLINE SoAParametersImpl(const TupleOrPointerType tuple) + : addr_(std::get<0>(tuple)), stride_(std::get<1>(tuple)) {} + SOA_HOST_DEVICE_INLINE SoAParametersImpl() {} + SOA_HOST_DEVICE_INLINE SoAParametersImpl(ScalarType* addr) : addr_(addr) {} + // Trick setter + return self-reference allowing commat-free 2-stage construction in macro contexts (in combination with the + // addr-only constructor. + SoAParametersImpl& setStride(size_t stride) { + stride_ = stride; + return *this; + } + static bool checkAlignement(const TupleOrPointerType tuple, size_t byteAlignment) { + const auto& [addr, stride] = tuple; + return reinterpret_cast(addr) % byteAlignment; + } + }; + + // Matryoshka template to avoiding commas in macros + template + struct SoAParameters_ColumnType { + template + struct DataType : public SoAParametersImpl { + using SoAParametersImpl::SoAParametersImpl; + }; + }; // Helper template managing the value within it column // The optional compile time alignment parameter enables informing the // compiler of alignment (enforced by caller). - template + template class SoAValue { + // Eigen is implemented in a specialization + static_assert(COLUMN_TYPE != SoAColumnType::eigen); + public: typedef add_restrict Restr; typedef typename Restr::Value Val; @@ -79,6 +208,7 @@ namespace cms::soa { typedef typename Restr::PointerToConst PtrToConst; typedef typename Restr::ReferenceToConst RefToConst; SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, SoAParametersImpl params) : idx_(i), col_(params.addr_) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ SOA_HOST_DEVICE_INLINE Ref operator()() { // Ptr type will add the restrict qualifyer if needed @@ -111,8 +241,59 @@ namespace cms::soa { }; // Helper template managing the value within it column - template + // TODO Create a const variant to avoid leaking mutable access. + template + class SoAValue { + public: + typedef C Type; + typedef Eigen::Map> MapType; + typedef Eigen::Map> CMapType; + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, typename C::Scalar* col, size_t stride) + : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + crCol_(col), + cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + stride_(stride) {} + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, SoAParametersImpl params) + : val_(params.addr_ + i, + C::RowsAtCompileTime, + C::ColsAtCompileTime, + Eigen::InnerStride(params.stride_)), + crCol_(params.addr_), + cVal_(crCol_ + i, + C::RowsAtCompileTime, + C::ColsAtCompileTime, + Eigen::InnerStride(params.stride_)), + stride_(params.stride_) {} + SOA_HOST_DEVICE_INLINE MapType& operator()() { return val_; } + SOA_HOST_DEVICE_INLINE const CMapType& operator()() const { return cVal_; } + SOA_HOST_DEVICE_INLINE operator C() { return val_; } + SOA_HOST_DEVICE_INLINE operator const C() const { return cVal_; } + SOA_HOST_DEVICE_INLINE C* operator&() { return &val_; } + SOA_HOST_DEVICE_INLINE const C* operator&() const { return &cVal_; } + template + SOA_HOST_DEVICE_INLINE MapType& operator=(const C2& v) { + return val_ = v; + } + typedef typename C::Scalar ValueType; + static constexpr auto valueSize = sizeof(C::Scalar); + SOA_HOST_DEVICE_INLINE size_t stride() const { return stride_; } + + private: + MapType val_; + const typename C::Scalar* __restrict__ crCol_; + CMapType cVal_; + size_t stride_; + }; + + // Helper template managing the value within it column + template class SoAConstValue { + // Eigen is implemented in a specialization + static_assert(COLUMN_TYPE != SoAColumnType::eigen); + public: typedef add_restrict Restr; typedef typename Restr::Value Val; @@ -120,7 +301,13 @@ namespace cms::soa { typedef typename Restr::Reference Ref; typedef typename Restr::PointerToConst PtrToConst; typedef typename Restr::ReferenceToConst RefToConst; + typedef SoAParametersImpl Params; + typedef SoAConstParametersImpl ConstParams; SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, const T* col) : idx_(i), col_(col) {} + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, SoAParametersImpl params) + : idx_(i), col_(params.addr_) {} + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, SoAConstParametersImpl params) + : idx_(i), col_(params.addr_) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ SOA_HOST_DEVICE_INLINE RefToConst operator()() const { // Ptr type will add the restrict qualifyer if needed @@ -144,33 +331,32 @@ namespace cms::soa { // Helper template managing the value within it column // TODO Create a const variant to avoid leaking mutable access. - template - class SoAEigenValue { + template + class SoAConstValue { public: typedef C Type; - typedef Eigen::Map> MapType; typedef Eigen::Map> CMapType; - SOA_HOST_DEVICE_INLINE SoAEigenValue(size_t i, typename C::Scalar* col, size_t stride) - : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), - crCol_(col), + typedef CMapType& RefToConst; + typedef SoAConstParametersImpl ConstParams; + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, typename C::Scalar* col, size_t stride) + : crCol_(col), cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), stride_(stride) {} - SOA_HOST_DEVICE_INLINE MapType& operator()() { return val_; } + SOA_HOST_DEVICE_INLINE SoAConstValue(size_t i, SoAConstParametersImpl params) + : crCol_(params.addr_), + cVal_(crCol_ + i, + C::RowsAtCompileTime, + C::ColsAtCompileTime, + Eigen::InnerStride(params.stride_)), + stride_(params.stride_) {} SOA_HOST_DEVICE_INLINE const CMapType& operator()() const { return cVal_; } - SOA_HOST_DEVICE_INLINE operator C() { return val_; } SOA_HOST_DEVICE_INLINE operator const C() const { return cVal_; } - SOA_HOST_DEVICE_INLINE C* operator&() { return &val_; } SOA_HOST_DEVICE_INLINE const C* operator&() const { return &cVal_; } - template - SOA_HOST_DEVICE_INLINE MapType& operator=(const C2& v) { - return val_ = v; - } typedef typename C::Scalar ValueType; static constexpr auto valueSize = sizeof(C::Scalar); - SOA_HOST_DEVICE_INLINE size_t stride() { return stride_; } + SOA_HOST_DEVICE_INLINE size_t stride() const { return stride_; } private: - MapType val_; const typename C::Scalar* __restrict__ crCol_; CMapType cVal_; size_t stride_; @@ -204,21 +390,6 @@ namespace cms::soa { } // namespace cms::soa -/* declare "scalars" (one value shared across the whole SoA) and "columns" (one value per element) */ -#define _VALUE_TYPE_SCALAR 0 -#define _VALUE_TYPE_COLUMN 1 -#define _VALUE_TYPE_EIGEN_COLUMN 2 - -namespace cms::soa { - - enum class SoAColumnType { - scalar = _VALUE_TYPE_SCALAR, - column = _VALUE_TYPE_COLUMN, - eigen = _VALUE_TYPE_EIGEN_COLUMN - }; - -} // namespace cms::soa - #define SOA_SCALAR(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) #define SOA_COLUMN(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) #define SOA_EIGEN_COLUMN(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) @@ -252,54 +423,60 @@ namespace cms::soa { // Column template struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE T* operator()() { return baseAddress_; } - SOA_HOST_DEVICE_INLINE T& operator()(size_t index) { return baseAddress_[index]; } + //SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE_INLINE T* operator()() { return params_.addr_; } + SOA_HOST_DEVICE_INLINE T& operator()(size_t index) { return params_.addr_[index]; } private: - T* baseAddress_; + SoAParametersImpl params_; }; // Const column template struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T* baseAddress) : baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE const T* operator()() const { return baseAddress_; } - SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return baseAddress_[index]; } + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE_INLINE const T* operator()() const { return params_.addr_; } + SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return params_.addr_[index]; } private: - const T* baseAddress_; + SoAConstParametersImpl params_; }; // Scalar template struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE T& operator()() { return *baseAddress_; } + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE_INLINE T& operator()() { return *params_.addr_; } SOA_HOST_DEVICE_INLINE void operator()(size_t index) const { assert(false && "Indexed access impossible for SoA scalars."); } private: - T* baseAddress_; + SoAParametersImpl params_; }; // Const scalar template struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const T* baseAddress) : baseAddress_(baseAddress) {} - SOA_HOST_DEVICE_INLINE T operator()() const { return *baseAddress_; } + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE_INLINE T operator()() const { return *params_.addr_; } SOA_HOST_DEVICE_INLINE void operator()(size_t index) const { assert(false && "Indexed access impossible for SoA scalars."); } private: - const T* baseAddress_; + SoAConstParametersImpl params_; }; /* A helper template stager avoiding comma in macros */ template struct SoAAccessors { + // TODO: useful? using myInt = int; template struct ColumnType { diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 0f17adde7..6257ad05c 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -92,34 +92,61 @@ /** * SoAMetadata member computing column pitch */ -#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Scalar */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ - } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ - CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ - return parent_.BOOST_PP_CAT(NAME, _); \ - } CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); }, /* Column */ \ - CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() \ - const { return parent_.BOOST_PP_CAT(NAME, _); } CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ - return parent_.BOOST_PP_CAT(NAME, _); \ - } size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ - ParentClass::byteAlignment; \ - } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column; \ - , /* Eigen column */ \ - size_t BOOST_PP_CAT(NAME, Pitch()) const { \ - return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ - ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ - } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ - CPP_TYPE::Scalar const* BOOST_PP_CAT(addressOf_, NAME)() const { \ - return parent_.BOOST_PP_CAT(NAME, _); \ - } CPP_TYPE::Scalar* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); }) - +// clang-format off +#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * ParentClass::byteAlignment; \ + } \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ + CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } \ + typedef cms::soa::SoAParameters_ColumnType::DataType \ + BOOST_PP_CAT(ParametersTypeOf_, NAME); \ + BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ + return BOOST_PP_CAT(ParametersTypeOf_, NAME) (parent_.BOOST_PP_CAT(NAME, _)); \ + } \ + CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); }, \ + /* Column */ \ + typedef cms::soa::SoAParameters_ColumnType::DataType \ + BOOST_PP_CAT(ParametersTypeOf_, NAME); \ + BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ + return BOOST_PP_CAT(ParametersTypeOf_, NAME) (parent_.BOOST_PP_CAT(NAME, _)); \ + } \ + CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } \ + CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ + ParentClass::byteAlignment; \ + } \ + typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column;, \ + /* Eigen column */ \ + typedef cms::soa::SoAParameters_ColumnType::DataType \ + BOOST_PP_CAT(ParametersTypeOf_, NAME); \ + BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ + return BOOST_PP_CAT(ParametersTypeOf_, NAME) ( \ + parent_.BOOST_PP_CAT(NAME, _), \ + parent_.BOOST_PP_CAT(NAME, Stride_)); \ + } \ + size_t BOOST_PP_CAT(NAME, Pitch()) const { \ + return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ + ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ + CPP_TYPE::Scalar const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.BOOST_PP_CAT(NAME, _); \ + } \ + CPP_TYPE::Scalar* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ +) +// clang-format on #define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME /** @@ -242,14 +269,13 @@ constexpr static size_t conditionalAlignment = \ alignmentEnforcement == AlignmentEnforcement::Enforced ? byteAlignment : 0; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ - template \ - using SoAValueWithConf = cms::soa::SoAValue; \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ - template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ template \ - using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ /* dump the SoA internal structure */ \ SOA_HOST_ONLY \ static void dump(size_t nElements) { \ diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 4a34cf759..7e4629841 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -44,10 +44,7 @@ namespace cms::soa { /* Value traits passes the class as is in the case of column type and return * an empty class with functions returning non-scalar as accessors. */ template - struct ConstValueTraits {}; - - template - struct ConstValueTraits : public C { + struct ConstValueTraits : public C { using C::C; }; @@ -55,17 +52,11 @@ namespace cms::soa { struct ConstValueTraits { // Just take to SoAValue type to generate the right constructor. SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType*) {} + SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::Params&) {} + SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::ConstParams&) {} // Any attempt to do anything with the "scalar" value a const element will fail. }; - template - struct ConstValueTraits { - // Just take to SoAValue type to generate the right constructor. - SOA_HOST_DEVICE_INLINE ConstValueTraits(size_t, const typename C::valueType*) {} - // TODO: implement - // Any attempt to do anything with the eigen value a const element will fail. - }; - } // namespace cms::soa #include @@ -84,30 +75,24 @@ namespace cms::soa { /** * Member types aliasing for referencing by name */ -#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ - typedef typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, LAYOUT_MEMBER) \ - BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ - constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ - BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ - SOA_HOST_DEVICE_INLINE \ - DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ - return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ - }; \ - static_assert(BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::eigen, \ - "Eigen columns not supported in views."); +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + typedef typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(TypeOf_, LAYOUT_MEMBER) \ + BOOST_PP_CAT(TypeOf_, LOCAL_NAME); \ + typedef typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ParametersTypeOf_, LAYOUT_MEMBER) \ + BOOST_PP_CAT(ParametersTypeOf_, LOCAL_NAME); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ + BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ + SOA_HOST_DEVICE_INLINE \ + DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ + return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ + }; \ + DATA BOOST_PP_CAT(ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(parametersOf_, LOCAL_NAME)() const { \ + return parent_.BOOST_PP_CAT(LOCAL_NAME, Parameters_); \ + }; #define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) -/** - * Member assignment for trivial constructor - */ -#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - (BOOST_PP_CAT(LOCAL_NAME, _)(nullptr)) - -#define _DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, LAYOUT_MEMBER_NAME) \ - BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION_IMPL LAYOUT_MEMBER_NAME) - /** * Generator of parameters (layouts/views) for constructor by layouts/views. */ @@ -120,7 +105,7 @@ namespace cms::soa { * Generator of parameters for constructor by column. */ #define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ - (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) + (DATA typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME)::TupleOrPointerType LOCAL_NAME) #define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND( \ @@ -130,15 +115,13 @@ namespace cms::soa { * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ - (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ - static_assert(BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, NAME) != cms::soa::SoAColumnType::eigen, \ - "Eigen values not supported in views"); \ - auto addr = LAYOUT.soaMetadata().BOOST_PP_CAT(addressOf_, MEMBER)(); \ - if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ - if (reinterpret_cast(addr) % byteAlignment) \ - throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ - return addr; \ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, Parameters_)([&]() -> auto { \ + auto params = LAYOUT.soaMetadata().BOOST_PP_CAT(parametersOf_, MEMBER)(); \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (reinterpret_cast(params.addr_) % byteAlignment) \ + throw std::out_of_range("In constructor by layout: misaligned column: " #NAME); \ + return params; \ }())) #define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -163,13 +146,17 @@ namespace cms::soa { * Generator of member initialization from constructor. * We use a lambda with auto return type to handle multiple possible return types. */ -#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(LAYOUT, MEMBER, NAME) \ - (BOOST_PP_CAT(NAME, _)([&]() -> auto { \ - if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ - if (reinterpret_cast(NAME) % byteAlignment) \ - throw std::out_of_range("In constructor by column: misaligned column: " #NAME); \ - return NAME; \ - }())) +// clang-format off +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(LAYOUT, MEMBER, NAME) \ + ( \ + BOOST_PP_CAT(NAME, Parameters_)([&]() -> auto { \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::Enforced) \ + if (SoAMetadata:: BOOST_PP_CAT(ParametersTypeOf_, NAME)::checkAlignment(NAME, byteAlignment)) \ + throw std::out_of_range("In constructor by column: misaligned column: " #NAME); \ + return NAME; \ + }()) \ + ) +// clang-format on #define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL LAYOUT_MEMBER_NAME) @@ -192,11 +179,20 @@ namespace cms::soa { * Generator of parameters for (non-const) element subclass (expanded comma separated). */ #define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ - (DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * LOCAL_NAME) + (DATA typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME) LOCAL_NAME) #define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, LAYOUT_MEMBER_NAME) \ _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA) +/** + * Generator of parameters for (const) element subclass (expanded comma separated). + */ +#define _DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + (DATA typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME)::ConstType LOCAL_NAME) + +#define _DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA) + /** * Generator of member initialization for constructor of element subclass */ @@ -210,11 +206,12 @@ namespace cms::soa { /** * Declaration of the members accessors of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - SOA_HOST_DEVICE_INLINE \ - typename SoAConstValueWithConf::RefToConst LOCAL_NAME() \ - const { \ - return BOOST_PP_CAT(LOCAL_NAME, _)(); \ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE_INLINE \ + typename SoAConstValueWithConf::RefToConst \ + LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _)(); \ } #define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -224,7 +221,8 @@ namespace cms::soa { * Declaration of the private members of the const element subclass */ #define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - const cms::soa::ConstValueTraits, \ + const cms::soa::ConstValueTraits, \ BOOST_PP_CAT(SoAMetadata::ColumnTypeOf_, LOCAL_NAME)> \ BOOST_PP_CAT(LOCAL_NAME, _); @@ -245,7 +243,9 @@ namespace cms::soa { * Declaration of the private members of the const element subclass */ #define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - SoAValueWithConf LOCAL_NAME; + SoAValueWithConf \ + LOCAL_NAME; #define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL LAYOUT_MEMBER_NAME @@ -253,7 +253,8 @@ namespace cms::soa { /** * Parameters passed to element subclass constructor in operator[] */ -#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) (BOOST_PP_CAT(LOCAL_NAME, _)) +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + (BOOST_PP_CAT(LOCAL_NAME, Parameters_)) #define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL LAYOUT_MEMBER_NAME) @@ -266,12 +267,12 @@ namespace cms::soa { SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ return typename cms::soa::SoAAccessors:: \ template ColumnType::template AccessType< \ - cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(); \ } \ SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ return typename cms::soa::SoAAccessors:: \ template ColumnType::template AccessType< \ - cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ + cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(index); \ } #define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -285,26 +286,37 @@ namespace cms::soa { SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() const { \ return typename cms::soa::SoAAccessors:: \ template ColumnType::template AccessType< \ - cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(); \ + cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(); \ } \ SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) const { \ return typename cms::soa::SoAAccessors:: \ template ColumnType::template AccessType< \ - cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, _))(index); \ + cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(index); \ } #define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) /** - * SoA class member declaration (column pointers). + * SoA class member declaration (column pointers and parameters). */ -#define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ - DATA typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _) = nullptr; +#define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _disabled) = nullptr; \ + typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(LOCAL_NAME, Parameters_); #define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) +/** + * Const SoA class member declaration (column pointers and parameters). + */ +#define _DECLARE_CONST_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + const typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _disabled) = nullptr; \ + typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME)::ConstType BOOST_PP_CAT(LOCAL_NAME, Parameters_); + +#define _DECLARE_CONST_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_CONST_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + /* ---- MUTABLE VIEW -------------------------------------------------------------------------------------------------------------------- */ // clang-format off #define GENERATE_SOA_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ @@ -329,14 +341,12 @@ namespace cms::soa { constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ - template \ - using SoAValueWithConf = cms::soa::SoAValue; \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ - template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ - template \ - using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ /** \ * Helper/friend class allowing SoA introspection. \ */ \ @@ -362,7 +372,7 @@ namespace cms::soa { SOA_HOST_DEVICE_INLINE SoAMetadata soaMetadata() { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ - CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + CLASS() {} \ \ /* Constructor relying on user provided layouts or views */ \ SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), LAYOUTS_LIST)) \ @@ -434,7 +444,7 @@ namespace cms::soa { SOA_HOST_ONLY friend void dump(); \ \ private: \ - size_t nElements_; \ + size_t nElements_ = 0; \ _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, BOOST_PP_EMPTY(), VALUE_LIST) \ }; // clang-format on @@ -463,15 +473,11 @@ namespace cms::soa { constexpr static cms::soa::RestrictQualify restrictQualify = RESTRICT_QUALIFY; \ constexpr static cms::soa::RangeChecking rangeChecking = RANGE_CHECKING; \ /* Those typedefs avoid having commas in macros (which is problematic) */ \ - template \ - using SoAValueWithConf = cms::soa::SoAValue; \ - \ - template \ - using SoAConstValueWithConf = cms::soa::SoAConstValue; \ - \ - template \ - using SoAEigenValueWithConf = cms::soa::SoAEigenValue; \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ /** \ * Helper/friend class allowing SoA introspection. \ */ \ @@ -495,7 +501,7 @@ namespace cms::soa { SOA_HOST_DEVICE_INLINE const SoAMetadata soaMetadata() const { return SoAMetadata(*this); } \ \ /* Trivial constuctor */ \ - CLASS() : nElements_(0), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_TRIVIAL_CONSTRUCTION, ~, VALUE_LIST) {} \ + CLASS() {} \ \ /* Constructor relying on user provided layouts or views */ \ SOA_HOST_ONLY CLASS(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, LAYOUTS_LIST)) \ @@ -515,7 +521,7 @@ namespace cms::soa { struct const_element { \ SOA_HOST_DEVICE_INLINE \ const_element(size_t index, /* Declare parameters */ \ - _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ + _ITERATE_ON_ALL_COMMA(_DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ \ @@ -541,8 +547,8 @@ namespace cms::soa { SOA_HOST_ONLY friend void dump(); \ \ private: \ - size_t nElements_; \ - _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_MEMBER, const, VALUE_LIST) \ + size_t nElements_ = 0; \ + _ITERATE_ON_ALL(_DECLARE_CONST_VIEW_SOA_MEMBER, const, VALUE_LIST) \ }; // clang-format on @@ -559,17 +565,17 @@ namespace cms::soa { */ #define GENERATE_SOA_LAYOUT_VIEW_AND_CONST_VIEW(LAYOUT_NAME, VIEW_NAME, CONST_VIEW_NAME, ...) \ - GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ + GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__) \ using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME<>; \ GENERATE_SOA_VIEW(VIEW_NAME, \ SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA( \ - _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); \ + _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))) \ GENERATE_SOA_CONST_VIEW( \ CONST_VIEW_NAME, \ SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ SOA_VIEW_VALUE_LIST( \ - _ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + _ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))) #define GENERATE_SOA_LAYOUT_AND_VIEW(LAYOUT_NAME, VIEW_NAME, ...) \ GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ @@ -577,15 +583,15 @@ namespace cms::soa { GENERATE_SOA_VIEW(VIEW_NAME, \ SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA( \ - _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))) #define GENERATE_SOA_LAYOUT_AND_CONST_VIEW(LAYOUT_NAME, CONST_VIEW_NAME, ...) \ - GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__); \ + GENERATE_SOA_LAYOUT(LAYOUT_NAME, __VA_ARGS__) \ using BOOST_PP_CAT(LAYOUT_NAME, _default) = LAYOUT_NAME<>; \ GENERATE_SOA_CONST_VIEW( \ CONST_VIEW_NAME, \ SOA_VIEW_LAYOUT_LIST((BOOST_PP_CAT(LAYOUT_NAME, _default), BOOST_PP_CAT(instance_, LAYOUT_NAME))), \ SOA_VIEW_VALUE_LIST( \ - _ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))); + _ITERATE_ON_ALL_COMMA(_VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, LAYOUT_NAME), __VA_ARGS__))) #endif // ndef DataStructures_SoAView_h diff --git a/src/cudadev/bin/Source.h b/src/cudadev/bin/Source.h index 69d54d336..c29685c07 100644 --- a/src/cudadev/bin/Source.h +++ b/src/cudadev/bin/Source.h @@ -17,11 +17,8 @@ namespace edm { class Source { public: - explicit Source(int maxEvents, - int runForMinutes, - ProductRegistry& reg, - std::filesystem::path const& datadir, - bool validation); + explicit Source( + int maxEvents, int runForMinutes, ProductRegistry& reg, std::filesystem::path const& datadir, bool validation); void startProcessing(); diff --git a/src/cudadev/test/SoALayoutAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu index 71c401063..3f1e58f43 100644 --- a/src/cudadev/test/SoALayoutAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -10,6 +10,7 @@ // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. +#if 1 GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, SoA1ViewTemplate, // predefined static scalars @@ -22,10 +23,9 @@ GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, SOA_COLUMN(double, z), SOA_COLUMN(double, sum), SOA_COLUMN(double, prod), - /* Leave Eigen definitions out until support is complete. - SOA_EIGEN_COLUMN(Eigen::Vector3d, a), - SOA_EIGEN_COLUMN(Eigen::Vector3d, b), - SOA_EIGEN_COLUMN(Eigen::Vector3d, r),*/ + SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r), SOA_COLUMN(uint16_t, color), SOA_COLUMN(int32_t, value), SOA_COLUMN(double *, py), @@ -35,6 +35,7 @@ GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, // scalars: one value for the whole structure SOA_SCALAR(const char *, description), SOA_SCALAR(uint32_t, someNumber)) +#endif using SoA1Layout = SoA1LayoutTemplate<>; using SoA1View = SoA1ViewTemplate<>; @@ -58,10 +59,9 @@ GENERATE_SOA_CONST_VIEW(SoA1View2Gconst, SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v)), SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soa1, x), SOA_VIEW_VALUE(soa1v, y), - /* Eigen columns are not supported in views. - SoA_view_value(soa1, a, a), - SoA_view_value(soa1, b, b), - SoA_view_value(soa1, r, r), */ + SOA_VIEW_VALUE(soa1, a), + SOA_VIEW_VALUE(soa1, b), + SOA_VIEW_VALUE(soa1, r), SOA_VIEW_VALUE(soa1, color), SOA_VIEW_VALUE(soa1v, value), SOA_VIEW_VALUE(soa1v, count), @@ -114,14 +114,13 @@ int main() { s.y = 2.0 * i; s.z = 3.0 * i; s.color() = i; - // TODO: re-enable when support of eigen is added to views. - /*s.a()(0) = 1.0 * i; + s.a()(0) = 1.0 * i; s.a()(1) = 2.0 * i; s.a()(2) = 3.0 * i; s.b()(0) = 3.0 * i; s.b()(1) = 2.0 * i; s.b()(2) = 1.0 * i; - s.r() = s.a().cross(s.b());*/ + s.r() = s.a().cross(s.b()); } // Check direct read back for (size_t i = 0; i < size; i++) { @@ -130,14 +129,13 @@ int main() { assert(s.y() == 2.0 * i); assert(s.z() == 3.0 * i); assert(s.color() == i); - // TODO: re-enable when support of eigen is added to views. - /*assert(s.a()(0) == 1.0 * i); + assert(s.a()(0) == 1.0 * i); assert(s.a()(1) == 2.0 * i); assert(s.a()(2) == 3.0 * i); assert(s.b()(0) == 3.0 * i); assert(s.b()(1) == 2.0 * i); assert(s.b()(2) == 1.0 * i); - assert(s.r() == s.a().cross(s.b()));*/ + assert(s.r() == s.a().cross(s.b())); } // Check readback through other views for (size_t i = 0; i < size; i++) { From bb0d02804fca69d6832a21eb77d6df8c6956591e Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Thu, 27 Jan 2022 14:22:21 +0100 Subject: [PATCH 42/50] [cudadev] Updated SoA status in MD file. --- src/cudadev/DataFormats/SoA.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cudadev/DataFormats/SoA.md b/src/cudadev/DataFormats/SoA.md index c990f5bc1..d136fb9f1 100644 --- a/src/cudadev/DataFormats/SoA.md +++ b/src/cudadev/DataFormats/SoA.md @@ -163,11 +163,12 @@ scenarios where only a subset of columns are used in a given GPU kernel. - The layout and views support scalars and columns, alignment and alignment enforcement and hinting. - Automatic `__restrict__` compiler hinting is supported. - A shortcut alloCreate a mechanism to derive trivial views and const views from a single layout. -- Cache access style, which was explored, was abandoned as this not-yet-used feature interferes with `__restrict__` support (which is already in used in existing code). It could be made available as a separate tool that can be used directly by the module developer, orthogonally from SoA. +- Cache access style, which was explored, was abandoned as this not-yet-used feature interferes with `__restrict__` support (which is +already in used in existing code). It could be made available as a separate tool that can be used directly by the module developer, +orthogonally from SoA. +- Optional (compile time) range checking validates the index of every column access, throwing an exception on the CPU side and forcing +a segmentation fault to halt kernels. When not enabled, it has no impact on performance (code not compiled) +- Eigen columns are also suported, with both const and non-const flavors. ### Planned additions -- Optional range checking will be added later. This implies adding support for size to views and will restrict views to columns of -equal size. -- Eigen access was validated with an earlier scheme, but will be ported back to the current one later. Some alignment information can be -passed to Eigen strcutures. Const variants of access classes should be created to ensure we cannot leak mutable access to const products. - Improve `dump()` function and turn it into a more classic `operator<<()`. From c7a5f593440d63f38222294f47a2e969c8df3075 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Fri, 28 Jan 2022 15:23:08 +0100 Subject: [PATCH 43/50] [cudadev] Fixed hardcoded restrict qualifier for Eigen SoA columns Removed unused macro. --- src/cudadev/DataFormats/SoACommon.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 0a1ed906e..d0a8e8364 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -14,13 +14,11 @@ #define SOA_DEVICE_ONLY __device__ #define SOA_HOST_DEVICE __host__ __device__ #define SOA_HOST_DEVICE_INLINE __host__ __device__ __forceinline__ -#define SOA_DEVICE_RESTRICT __restrict__ #else #define SOA_HOST_ONLY #define SOA_DEVICE_ONLY #define SOA_HOST_DEVICE #define SOA_HOST_DEVICE_INLINE inline -#define SOA_DEVICE_RESTRICT #endif // Exception throwing (or willful crash in kernels) @@ -248,6 +246,12 @@ namespace cms::soa { typedef C Type; typedef Eigen::Map> MapType; typedef Eigen::Map> CMapType; + typedef add_restrict Restr; + typedef typename Restr::Value Val; + typedef typename Restr::Pointer Ptr; + typedef typename Restr::Reference Ref; + typedef typename Restr::PointerToConst PtrToConst; + typedef typename Restr::ReferenceToConst RefToConst; SOA_HOST_DEVICE_INLINE SoAValue(size_t i, typename C::Scalar* col, size_t stride) : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), crCol_(col), @@ -280,7 +284,7 @@ namespace cms::soa { private: MapType val_; - const typename C::Scalar* __restrict__ crCol_; + const Ptr crCol_; CMapType cVal_; size_t stride_; }; From 4f0817942800831311e1dd6fb9292511b5e16de1 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Fri, 28 Jan 2022 16:20:43 +0100 Subject: [PATCH 44/50] [cudadev] Removed superseeded variable in SoA views. --- src/cudadev/DataFormats/SoAView.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 7e4629841..355933a5b 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -301,7 +301,6 @@ namespace cms::soa { * SoA class member declaration (column pointers and parameters). */ #define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ - typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _disabled) = nullptr; \ typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(LOCAL_NAME, Parameters_); #define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -311,7 +310,6 @@ namespace cms::soa { * Const SoA class member declaration (column pointers and parameters). */ #define _DECLARE_CONST_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ - const typename BOOST_PP_CAT(SoAMetadata::TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(LOCAL_NAME, _disabled) = nullptr; \ typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME)::ConstType BOOST_PP_CAT(LOCAL_NAME, Parameters_); #define _DECLARE_CONST_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ From 218f925f9183c78f6b6cc367d4596bcba14656db Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 31 Jan 2022 10:25:28 +0100 Subject: [PATCH 45/50] [cudadev] Cleaned up testing #if --- src/cudadev/test/SoALayoutAndView_t.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cudadev/test/SoALayoutAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu index 3f1e58f43..511806bf2 100644 --- a/src/cudadev/test/SoALayoutAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -10,7 +10,6 @@ // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. -#if 1 GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, SoA1ViewTemplate, // predefined static scalars @@ -35,7 +34,6 @@ GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, // scalars: one value for the whole structure SOA_SCALAR(const char *, description), SOA_SCALAR(uint32_t, someNumber)) -#endif using SoA1Layout = SoA1LayoutTemplate<>; using SoA1View = SoA1ViewTemplate<>; From 2be25c5a733ae585ea326bf5886b957223e6757b Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 31 Jan 2022 14:46:43 +0100 Subject: [PATCH 46/50] [cudadev] Made Eigen headers inclusion optional for SoA. Eigen/Core should be included before the SoA headers to enable support. --- src/cudadev/DataFormats/SoACommon.h | 26 +++++++++++++++++++++++--- src/cudadev/test/SoALayoutAndView_t.cu | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index d0a8e8364..fb45a1692 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -6,7 +6,8 @@ #define DataStructures_SoACommon_h #include "boost/preprocessor.hpp" -#include +#include +#include // CUDA attributes #ifdef __CUDACC__ @@ -240,6 +241,7 @@ namespace cms::soa { // Helper template managing the value within it column // TODO Create a const variant to avoid leaking mutable access. +#ifdef EIGEN_WORLD_VERSION template class SoAValue { public: @@ -288,7 +290,12 @@ namespace cms::soa { CMapType cVal_; size_t stride_; }; - +#else + template + class SoAValue { + // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + }; +#endif // Helper template managing the value within it column template @@ -365,8 +373,15 @@ namespace cms::soa { CMapType cVal_; size_t stride_; }; +#else + template + class SoAConstValue { + // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + }; +#endif // Helper template to avoid commas in macro +#ifdef EIGEN_WORLD_VERSION template struct EigenConstMapMaker { typedef Eigen::Map> Type; @@ -383,7 +398,12 @@ namespace cms::soa { }; static DataHolder withData(const typename C::Scalar* data) { return DataHolder(data); } }; - +#else + template + struct EigenConstMapMaker { + // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + }; +#endif // Helper function to compute aligned size inline size_t alignSize(size_t size, size_t alignment = 128) { if (size) diff --git a/src/cudadev/test/SoALayoutAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu index 511806bf2..b3f6b20d2 100644 --- a/src/cudadev/test/SoALayoutAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -1,3 +1,4 @@ +#include #include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" #include From 3abe4d5fbb68d4e521f28dbb011d499c6243d7b8 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Mon, 31 Jan 2022 15:13:29 +0100 Subject: [PATCH 47/50] [cudadev] Improved compilation errors when Eigen is missing for SoA. --- src/cudadev/DataFormats/SoACommon.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index fb45a1692..209e511af 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -294,6 +294,7 @@ namespace cms::soa { template class SoAValue { // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + static_assert(!sizeof(C), "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); }; #endif // Helper template managing the value within it column @@ -377,6 +378,7 @@ namespace cms::soa { template class SoAConstValue { // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + static_assert(!sizeof(C), "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); }; #endif @@ -402,6 +404,7 @@ namespace cms::soa { template struct EigenConstMapMaker { // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + static_assert(!sizeof(C), "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); }; #endif // Helper function to compute aligned size From 8b50bc38e5fdae43869a1e2b1deec211cc3355da Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Wed, 2 Feb 2022 10:37:11 +0100 Subject: [PATCH 48/50] [cudadev] Replaced .dump() member function with operator<<() for SoA layout This also changes the function from a static to a non static one. A sample output is: SoA1LayoutTemplate(10000 elements, byte alignement= 128, @0x7f7a8d525080): sizeof(SoA1LayoutTemplate): 168 Column x at offset 0 has size 80000 and padding 0 Column y at offset 80000 has size 80000 and padding 0 Column z at offset 160000 has size 80000 and padding 0 Column sum at offset 240000 has size 80000 and padding 0 Column prod at offset 320000 has size 80000 and padding 0 Eigen value a at offset 400000 has dimension (3 x 1) and per column size 80000 and padding 0 Eigen value b at offset 640000 has dimension (3 x 1) and per column size 80000 and padding 0 Eigen value r at offset 880000 has dimension (3 x 1) and per column size 80000 and padding 0 Column color at offset 1120000 has size 20000 and padding 96 Column value at offset 1140096 has size 40000 and padding 64 Column py at offset 1180160 has size 80000 and padding 0 Column count at offset 1260160 has size 40000 and padding 64 Column anotherCount at offset 1300224 has size 40000 and padding 64 Scalar description at offset 1340288 has size 8 and padding 120 Scalar someNumber at offset 1340416 has size 4 and padding 124 Final offset = 1340544 computeDataSize(...): 1340544 --- src/cudadev/DataFormats/SoACommon.h | 19 ++++++- src/cudadev/DataFormats/SoALayout.h | 76 +++++++++++++------------- src/cudadev/DataFormats/SoAView.h | 4 +- src/cudadev/test/SoALayoutAndView_t.cu | 3 + 4 files changed, 58 insertions(+), 44 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 209e511af..579d140da 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -8,6 +8,7 @@ #include "boost/preprocessor.hpp" #include #include +#include // CUDA attributes #ifdef __CUDACC__ @@ -294,7 +295,8 @@ namespace cms::soa { template class SoAValue { // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. - static_assert(!sizeof(C), "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); + static_assert(!sizeof(C), + "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); }; #endif // Helper template managing the value within it column @@ -378,7 +380,8 @@ namespace cms::soa { template class SoAConstValue { // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. - static_assert(!sizeof(C), "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); + static_assert(!sizeof(C), + "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); }; #endif @@ -404,7 +407,8 @@ namespace cms::soa { template struct EigenConstMapMaker { // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. - static_assert(!sizeof(C), "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); + static_assert(!sizeof(C), + "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); }; #endif // Helper function to compute aligned size @@ -529,6 +533,15 @@ namespace cms::soa { static constexpr size_t defaultSize = NvidiaGPU; }; + // An empty shell class to restrict the scope of tempalted operator<<(ostream, soa). + struct BaseLayout {}; } // namespace cms::soa +// Small wrapper for stream insertion of SoA printing +template ::value, SOA>::type> +SOA_HOST_ONLY std::ostream& operator<<(std::ostream& os, const SOA& soa) { + soa.toStream(os); + return os; +} #endif // ndef DataStructures_SoACommon_h diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 6257ad05c..9f5d171ea 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -56,38 +56,36 @@ * */ -#define _DECLARE_SOA_DUMP_INFO_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Dump scalar */ \ - std::cout << " Scalar " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset << " has size " << sizeof(CPP_TYPE) << " and padding " \ - << ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment - sizeof(CPP_TYPE) \ - << std::endl; \ - offset += ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment; \ - , /* Dump column */ \ - std::cout \ - << " Column " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset << " has size " << sizeof(CPP_TYPE) * nElements \ - << " and padding " \ - << (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * \ - byteAlignment - \ - (sizeof(CPP_TYPE) * nElements) \ - << std::endl; \ - offset += (((nElements * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ - , /* Dump Eigen column */ \ - std::cout \ - << " Eigen value " BOOST_PP_STRINGIZE(NAME) "_ at offset " << offset << " has dimension (" << CPP_TYPE::RowsAtCompileTime << " x " \ - << CPP_TYPE::ColsAtCompileTime \ - << ")" \ - << " and per column size " \ - << sizeof(CPP_TYPE::Scalar) * nElements \ - << " and padding " \ - << (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * \ - byteAlignment - \ - (sizeof(CPP_TYPE::Scalar) * nElements) \ - << std::endl; \ - offset += (((nElements * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment * \ +// clang-format off +#define _DECLARE_SOA_STREAM_INFO_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, \ + /* Dump scalar */ \ + os << " Scalar " BOOST_PP_STRINGIZE(NAME) " at offset " << offset << " has size " << sizeof(CPP_TYPE) \ + << " and padding " << ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment - sizeof(CPP_TYPE) \ + << std::endl; \ + offset += ((sizeof(CPP_TYPE) - 1) / byteAlignment + 1) * byteAlignment; \ + , /* Dump column */ \ + os << " Column " BOOST_PP_STRINGIZE(NAME) " at offset " << offset << " has size " << sizeof(CPP_TYPE) * nElements_ \ + << " and padding " \ + << (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment - (sizeof(CPP_TYPE) * nElements_) \ + << std::endl; \ + offset += (((nElements_ * sizeof(CPP_TYPE) - 1) / byteAlignment) + 1) * byteAlignment; \ + , /* Dump Eigen column */ \ + os << " Eigen value " BOOST_PP_STRINGIZE(NAME) " at offset " << offset << " has dimension (" \ + << CPP_TYPE::RowsAtCompileTime << " x " << CPP_TYPE::ColsAtCompileTime \ + << ")" \ + << " and per column size " \ + << sizeof(CPP_TYPE::Scalar) * nElements_ \ + << " and padding " \ + << (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment - \ + (sizeof(CPP_TYPE::Scalar) * nElements_) \ + << std::endl; \ + offset += (((nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / byteAlignment) + 1) * byteAlignment * \ CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime;) +// clang-format on -#define _DECLARE_SOA_DUMP_INFO(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_DUMP_INFO_IMPL TYPE_NAME) +#define _DECLARE_SOA_STREAM_INFO(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_STREAM_INFO_IMPL TYPE_NAME) /** * SoAMetadata member computing column pitch @@ -254,7 +252,7 @@ #define GENERATE_SOA_LAYOUT(CLASS, ...) \ template \ - struct CLASS { \ + struct CLASS: public cms::soa::BaseLayout { \ /* these could be moved to an external type trait to free up the symbol names */ \ using self_type = CLASS; \ typedef cms::soa::AlignmentEnforcement AlignmentEnforcement; \ @@ -275,18 +273,18 @@ template \ using SoAConstValueWithConf = cms::soa::SoAConstValue; \ \ - template \ /* dump the SoA internal structure */ \ SOA_HOST_ONLY \ - static void dump(size_t nElements) { \ - std::cout << #CLASS "(" << nElements << ", " << ALIGNMENT << "): " << std::endl; \ - std::cout << " sizeof(" #CLASS "): " << sizeof(CLASS) << std::endl; \ + void toStream(std::ostream & os) const { \ + os << #CLASS "(" << nElements_ << " elements, byte alignement= " << byteAlignment << ", @"<< mem_ <<"): " << std::endl; \ + os << " sizeof(" #CLASS "): " << sizeof(CLASS) << std::endl; \ size_t offset = 0; \ - _ITERATE_ON_ALL(_DECLARE_SOA_DUMP_INFO, ~, __VA_ARGS__) \ - std::cout << "Final offset = " << offset << " computeDataSize(...): " << computeDataSize(nElements) \ - << std::endl; \ - std::cout << std::endl; \ + _ITERATE_ON_ALL(_DECLARE_SOA_STREAM_INFO, ~, __VA_ARGS__) \ + os << "Final offset = " << offset << " computeDataSize(...): " << computeDataSize(nElements_) \ + << std::endl; \ + os << std::endl; \ } \ + \ /* Helper function used by caller to externally allocate the storage */ \ static size_t computeDataSize(size_t nElements) { \ size_t ret = 0; \ diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 355933a5b..39823b021 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -300,7 +300,7 @@ namespace cms::soa { /** * SoA class member declaration (column pointers and parameters). */ -#define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ +#define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(LOCAL_NAME, Parameters_); #define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -309,7 +309,7 @@ namespace cms::soa { /** * Const SoA class member declaration (column pointers and parameters). */ -#define _DECLARE_CONST_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ +#define _DECLARE_CONST_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ typename BOOST_PP_CAT(SoAMetadata::ParametersTypeOf_, LOCAL_NAME)::ConstType BOOST_PP_CAT(LOCAL_NAME, Parameters_); #define _DECLARE_CONST_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ diff --git a/src/cudadev/test/SoALayoutAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu index b3f6b20d2..8be77ddd7 100644 --- a/src/cudadev/test/SoALayoutAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -166,4 +166,7 @@ int main() { assert(false); } catch (const std::out_of_range &) { } + + // Print out the layout + std::cout << soa1 << std::endl; } \ No newline at end of file From 352eee9b5a255163e2c477bf7b8171b68745b80e Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 5 Apr 2022 15:58:35 +0200 Subject: [PATCH 49/50] [cudadev] Caught up with SoA developments in alpaka. --- src/cudadev/DataFormats/SoACommon.h | 49 +++++++++++++++++----- src/cudadev/DataFormats/SoALayout.h | 63 ++++++++++++++++++----------- src/cudadev/DataFormats/SoAView.h | 26 +++++++----- 3 files changed, 94 insertions(+), 44 deletions(-) diff --git a/src/cudadev/DataFormats/SoACommon.h b/src/cudadev/DataFormats/SoACommon.h index 579d140da..88afb07d6 100644 --- a/src/cudadev/DataFormats/SoACommon.h +++ b/src/cudadev/DataFormats/SoACommon.h @@ -27,7 +27,7 @@ #if defined(__CUDACC__) && defined(__CUDA_ARCH__) #define SOA_THROW_OUT_OF_RANGE(A) \ { \ - printf(A); \ + printf(A "\n"); \ *((char*)nullptr) = 0; \ } #else @@ -163,7 +163,8 @@ namespace cms::soa { typedef std::tuple TupleOrPointerType; ScalarType* addr_ = nullptr; size_t stride_ = 0; - SOA_HOST_DEVICE_INLINE SoAParametersImpl(ScalarType* addr, size_t stride) : addr_(addr), stride_(stride) {} + SOA_HOST_DEVICE_INLINE SoAParametersImpl(ScalarType* addr, size_t stride) + : addr_(addr), stride_(stride) {} SOA_HOST_DEVICE_INLINE SoAParametersImpl(const TupleOrPointerType tuple) : addr_(std::get<0>(tuple)), stride_(std::get<1>(tuple)) {} SOA_HOST_DEVICE_INLINE SoAParametersImpl() {} @@ -208,7 +209,8 @@ namespace cms::soa { typedef typename Restr::PointerToConst PtrToConst; typedef typename Restr::ReferenceToConst RefToConst; SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T* col) : idx_(i), col_(col) {} - SOA_HOST_DEVICE_INLINE SoAValue(size_t i, SoAParametersImpl params) : idx_(i), col_(params.addr_) {} + SOA_HOST_DEVICE_INLINE SoAValue(size_t i, SoAParametersImpl params) + : idx_(i), col_(params.addr_) {} /* SOA_HOST_DEVICE_INLINE operator T&() { return col_[idx_]; } */ SOA_HOST_DEVICE_INLINE Ref operator()() { // Ptr type will add the restrict qualifyer if needed @@ -458,6 +460,7 @@ namespace cms::soa { SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) : params_(params) {} SOA_HOST_DEVICE_INLINE T* operator()() { return params_.addr_; } + typedef T* NoParamReturnType; SOA_HOST_DEVICE_INLINE T& operator()(size_t index) { return params_.addr_[index]; } private: @@ -467,9 +470,11 @@ namespace cms::soa { // Const column template struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + SOA_HOST_DEVICE_INLINE + SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) : params_(params) {} SOA_HOST_DEVICE_INLINE const T* operator()() const { return params_.addr_; } + typedef T* NoParamReturnType; SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return params_.addr_[index]; } private: @@ -482,6 +487,7 @@ namespace cms::soa { SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) : params_(params) {} SOA_HOST_DEVICE_INLINE T& operator()() { return *params_.addr_; } + typedef T& NoParamReturnType; SOA_HOST_DEVICE_INLINE void operator()(size_t index) const { assert(false && "Indexed access impossible for SoA scalars."); } @@ -493,9 +499,11 @@ namespace cms::soa { // Const scalar template struct SoAColumnAccessorsImpl { - SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + SOA_HOST_DEVICE_INLINE + SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) : params_(params) {} SOA_HOST_DEVICE_INLINE T operator()() const { return *params_.addr_; } + typedef T NoParamReturnType; SOA_HOST_DEVICE_INLINE void operator()(size_t index) const { assert(false && "Indexed access impossible for SoA scalars."); } @@ -504,17 +512,40 @@ namespace cms::soa { SoAConstParametersImpl params_; }; + template + struct SoAColumnAccessorsImpl { + //SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE_INLINE typename T::Scalar* operator()() { return params_.addr_; } + typedef typename T::Scalar* NoParamReturnType; + //SOA_HOST_DEVICE_INLINE T& operator()(size_t index) { return params_.addr_[index]; } + + private: + SoAParametersImpl params_; + }; + + // Const column + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE_INLINE + SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE_INLINE const typename T::Scalar* operator()() const { return params_.addr_; } + typedef typename T::Scalar* NoParamReturnType; + //SOA_HOST_DEVICE_INLINE T operator()(size_t index) const { return params_.addr_[index]; } + + private: + SoAConstParametersImpl params_; + }; + /* A helper template stager avoiding comma in macros */ template struct SoAAccessors { - // TODO: useful? - using myInt = int; template struct ColumnType { - using myInt = int; template struct AccessType : public SoAColumnAccessorsImpl { - using myInt = int; using SoAColumnAccessorsImpl::SoAColumnAccessorsImpl; }; }; diff --git a/src/cudadev/DataFormats/SoALayout.h b/src/cudadev/DataFormats/SoALayout.h index 9f5d171ea..036f1cb4f 100644 --- a/src/cudadev/DataFormats/SoALayout.h +++ b/src/cudadev/DataFormats/SoALayout.h @@ -99,27 +99,36 @@ } \ typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ + SOA_HOST_DEVICE_INLINE \ CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ - return parent_.BOOST_PP_CAT(NAME, _); \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ } \ typedef cms::soa::SoAParameters_ColumnType::DataType \ BOOST_PP_CAT(ParametersTypeOf_, NAME); \ + SOA_HOST_DEVICE_INLINE \ BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ return BOOST_PP_CAT(ParametersTypeOf_, NAME) (parent_.BOOST_PP_CAT(NAME, _)); \ } \ - CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); }, \ + SOA_HOST_DEVICE_INLINE \ + CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + }, \ /* Column */ \ typedef cms::soa::SoAParameters_ColumnType::DataType \ BOOST_PP_CAT(ParametersTypeOf_, NAME); \ + SOA_HOST_DEVICE_INLINE \ BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ return BOOST_PP_CAT(ParametersTypeOf_, NAME) (parent_.BOOST_PP_CAT(NAME, _)); \ } \ + SOA_HOST_DEVICE_INLINE \ CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ - return parent_.BOOST_PP_CAT(NAME, _); \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ } \ + SOA_HOST_DEVICE_INLINE \ CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ - return parent_.BOOST_PP_CAT(NAME, _); \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ } \ + SOA_HOST_DEVICE_INLINE \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment; \ @@ -129,20 +138,26 @@ /* Eigen column */ \ typedef cms::soa::SoAParameters_ColumnType::DataType \ BOOST_PP_CAT(ParametersTypeOf_, NAME); \ + SOA_HOST_DEVICE_INLINE \ BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ return BOOST_PP_CAT(ParametersTypeOf_, NAME) ( \ parent_.BOOST_PP_CAT(NAME, _), \ parent_.BOOST_PP_CAT(NAME, Stride_)); \ } \ + SOA_HOST_DEVICE_INLINE \ size_t BOOST_PP_CAT(NAME, Pitch()) const { \ return (((parent_.nElements_ * sizeof(CPP_TYPE::Scalar) - 1) / ParentClass::byteAlignment) + 1) * \ ParentClass::byteAlignment * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ } typedef CPP_TYPE BOOST_PP_CAT(TypeOf_, NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ + SOA_HOST_DEVICE_INLINE \ CPP_TYPE::Scalar const* BOOST_PP_CAT(addressOf_, NAME)() const { \ - return parent_.BOOST_PP_CAT(NAME, _); \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + } \ + SOA_HOST_DEVICE_INLINE \ + CPP_TYPE::Scalar* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ } \ - CPP_TYPE::Scalar* BOOST_PP_CAT(addressOf_, NAME)() { return parent_.BOOST_PP_CAT(NAME, _); } \ ) // clang-format on #define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME @@ -197,14 +212,14 @@ /** * Direct access to column pointer and indexed access */ -#define _DECLARE_SOA_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Scalar */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME() { return *BOOST_PP_CAT(NAME, _); }, /* Column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { \ - return BOOST_PP_CAT(NAME, _); \ - } SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; }, \ - /* Eigen column */ /* Unsupported for the moment TODO */ \ +#define _DECLARE_SOA_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME() { return *BOOST_PP_CAT(NAME, _); }, /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE* NAME() { \ + return BOOST_PP_CAT(NAME, _); \ + } SOA_HOST_DEVICE_INLINE CPP_TYPE& NAME(size_t index) { return BOOST_PP_CAT(NAME, _)[index]; }, \ + /* Eigen column */ /* Unsupported for the moment TODO */ \ BOOST_PP_EMPTY()) #define _DECLARE_SOA_ACCESSOR(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_ACCESSOR_IMPL TYPE_NAME) @@ -212,16 +227,16 @@ /** * Direct access to column pointer (const) and indexed access. */ -#define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ - _SWITCH_ON_TYPE( \ - VALUE_TYPE, /* Scalar */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return *(BOOST_PP_CAT(NAME, _)); }, /* Column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() \ - const { return BOOST_PP_CAT(NAME, _); } SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) \ - const { return *(BOOST_PP_CAT(NAME, _) + index); }, /* Eigen column */ \ - SOA_HOST_DEVICE_INLINE CPP_TYPE::Scalar const* NAME() const { \ - return BOOST_PP_CAT(NAME, _); \ - } SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT(NAME, Stride)() { return BOOST_PP_CAT(NAME, Stride_); }) +#define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, /* Scalar */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE NAME() const { return *(BOOST_PP_CAT(NAME, _)); }, /* Column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE const* NAME() \ + const { return BOOST_PP_CAT(NAME, _); } SOA_HOST_DEVICE_INLINE CPP_TYPE NAME(size_t index) \ + const { return *(BOOST_PP_CAT(NAME, _) + index); }, /* Eigen column */ \ + SOA_HOST_DEVICE_INLINE CPP_TYPE::Scalar const* NAME() \ + const { return BOOST_PP_CAT(NAME, _); } SOA_HOST_DEVICE_INLINE size_t BOOST_PP_CAT( \ + NAME, Stride)() { return BOOST_PP_CAT(NAME, Stride_); }) #define _DECLARE_SOA_CONST_ACCESSOR(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_CONST_ACCESSOR_IMPL TYPE_NAME) diff --git a/src/cudadev/DataFormats/SoAView.h b/src/cudadev/DataFormats/SoAView.h index 39823b021..5b691c579 100644 --- a/src/cudadev/DataFormats/SoAView.h +++ b/src/cudadev/DataFormats/SoAView.h @@ -82,10 +82,10 @@ namespace cms::soa { BOOST_PP_CAT(ParametersTypeOf_, LOCAL_NAME); \ constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::SoAMetadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ - SOA_HOST_DEVICE_INLINE \ - DATA BOOST_PP_CAT(TypeOf_, LOCAL_NAME) * BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ - return parent_.BOOST_PP_CAT(LOCAL_NAME, _); \ + SOA_HOST_DEVICE_INLINE DATA auto* BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ + return parent_.soaMetadata().BOOST_PP_CAT(parametersOf_, LOCAL_NAME)().addr_; \ }; \ + SOA_HOST_DEVICE_INLINE \ DATA BOOST_PP_CAT(ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(parametersOf_, LOCAL_NAME)() const { \ return parent_.BOOST_PP_CAT(LOCAL_NAME, Parameters_); \ }; @@ -206,12 +206,12 @@ namespace cms::soa { /** * Declaration of the members accessors of the const element subclass */ -#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ - SOA_HOST_DEVICE_INLINE \ - typename SoAConstValueWithConf::RefToConst \ - LOCAL_NAME() const { \ - return BOOST_PP_CAT(LOCAL_NAME, _)(); \ +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE_INLINE \ + typename SoAConstValueWithConf::RefToConst \ + LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _)(); \ } #define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ @@ -264,12 +264,16 @@ namespace cms::soa { */ #define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ /* Column or scalar */ \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME() { \ + SOA_HOST_DEVICE_INLINE \ + typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::mutableAccess>::NoParamReturnType \ + LOCAL_NAME() { \ return typename cms::soa::SoAAccessors:: \ template ColumnType::template AccessType< \ cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(); \ } \ - SOA_HOST_DEVICE_INLINE auto LOCAL_NAME(size_t index) { \ + SOA_HOST_DEVICE_INLINE auto& LOCAL_NAME(size_t index) { \ return typename cms::soa::SoAAccessors:: \ template ColumnType::template AccessType< \ cms::soa::SoAAccessType::mutableAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(index); \ From 54d0e3a772f495cd3e669a0d9a268e5c1f34a40f Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 5 Apr 2022 18:02:16 +0200 Subject: [PATCH 50/50] [cudadev] Backported the kernel based SoA test from alpaka to cudadev --- src/cudadev/test/SoALayoutAndView_t.cu | 356 ++++++++++++++++--------- 1 file changed, 229 insertions(+), 127 deletions(-) diff --git a/src/cudadev/test/SoALayoutAndView_t.cu b/src/cudadev/test/SoALayoutAndView_t.cu index 8be77ddd7..edc848cfa 100644 --- a/src/cudadev/test/SoALayoutAndView_t.cu +++ b/src/cudadev/test/SoALayoutAndView_t.cu @@ -1,6 +1,7 @@ #include #include "DataFormats/SoALayout.h" #include "DataFormats/SoAView.h" +#include "CUDACore/cudaCheck.h" #include #include #include @@ -11,8 +12,8 @@ // Scalars, Columns of scalars and of Eigen vectors // View to each of them, from one and multiple stores. -GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, - SoA1ViewTemplate, +GENERATE_SOA_LAYOUT_AND_VIEW(SoAHostDeviceLayoutTemplate, + SoAHostDeviceViewTemplate, // predefined static scalars // size_t size; // size_t alignment; @@ -21,152 +22,253 @@ GENERATE_SOA_LAYOUT_AND_VIEW(SoA1LayoutTemplate, SOA_COLUMN(double, x), SOA_COLUMN(double, y), SOA_COLUMN(double, z), - SOA_COLUMN(double, sum), - SOA_COLUMN(double, prod), SOA_EIGEN_COLUMN(Eigen::Vector3d, a), SOA_EIGEN_COLUMN(Eigen::Vector3d, b), SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + // scalars: one value for the whole structure + SOA_SCALAR(const char*, description), + SOA_SCALAR(uint32_t, someNumber)) + +using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>; +using SoAHostDeviceView = + SoAHostDeviceViewTemplate; + +GENERATE_SOA_LAYOUT_AND_VIEW(SoADeviceOnlyLayoutTemplate, + SoADeviceOnlyViewTemplate, SOA_COLUMN(uint16_t, color), - SOA_COLUMN(int32_t, value), - SOA_COLUMN(double *, py), + SOA_COLUMN(double, value), + SOA_COLUMN(double*, py), SOA_COLUMN(uint32_t, count), - SOA_COLUMN(uint32_t, anotherCount), + SOA_COLUMN(uint32_t, anotherCount)) - // scalars: one value for the whole structure - SOA_SCALAR(const char *, description), - SOA_SCALAR(uint32_t, someNumber)) +using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>; +using SoADeviceOnlyView = + SoADeviceOnlyViewTemplate; + +// A 1 to 1 view of the store (except for unsupported types). +GENERATE_SOA_VIEW(SoAFullDeviceViewTemplate, + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD), + SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)), + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_VALUE(soaHD, x), + SOA_VIEW_VALUE(soaHD, y), + SOA_VIEW_VALUE(soaHD, z), + SOA_VIEW_VALUE(soaDO, color), + SOA_VIEW_VALUE(soaDO, value), + SOA_VIEW_VALUE(soaDO, py), + SOA_VIEW_VALUE(soaDO, count), + SOA_VIEW_VALUE(soaDO, anotherCount), + SOA_VIEW_VALUE(soaHD, description), + SOA_VIEW_VALUE(soaHD, someNumber))) -using SoA1Layout = SoA1LayoutTemplate<>; -using SoA1View = SoA1ViewTemplate<>; - -// A partial view (artificial mix of store and view) -GENERATE_SOA_VIEW(SoA1View2GTemplate, - SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v)), - SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soa1, x), - SOA_VIEW_VALUE(soa1v, y), - SOA_VIEW_VALUE(soa1, color), - SOA_VIEW_VALUE(soa1v, value), - SOA_VIEW_VALUE(soa1v, count), - SOA_VIEW_VALUE(soa1, anotherCount), - SOA_VIEW_VALUE(soa1v, description), - SOA_VIEW_VALUE(soa1, someNumber))) - -using SoA1View2G = SoA1View2GTemplate<>; - -// Same partial view, yet const. -GENERATE_SOA_CONST_VIEW(SoA1View2Gconst, - SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoA1Layout, soa1), SOA_VIEW_LAYOUT(SoA1View, soa1v)), - SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soa1, x), - SOA_VIEW_VALUE(soa1v, y), - SOA_VIEW_VALUE(soa1, a), - SOA_VIEW_VALUE(soa1, b), - SOA_VIEW_VALUE(soa1, r), - SOA_VIEW_VALUE(soa1, color), - SOA_VIEW_VALUE(soa1v, value), - SOA_VIEW_VALUE(soa1v, count), - SOA_VIEW_VALUE(soa1, anotherCount), - SOA_VIEW_VALUE(soa1v, description), - SOA_VIEW_VALUE(soa1, someNumber))) - -// Parameter reusing kernels. The disassembly will indicate whether the compiler uses the wanted cache hits and uses -// `restrict` hints avoid multiple reduce loads. -// The PTX can be obtained using -ptx insterad of -c when compiling. -template -__device__ void addAndMulTemplate(T soa, size_t size) { - auto idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx >= size) - return; - auto si = soa[idx]; - si.sum() = si.x() + si.y(); - si.prod() = si.x() * si.y(); +using SoAFullDeviceView = + SoAFullDeviceViewTemplate; + +// Eigen cross product kernel (on store) +__global__ void crossProduct(SoAHostDeviceView soa, const unsigned int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i>=numElements) return; + auto si = soa[i]; + si.r() = si.a().cross(si.b()); } -__global__ void aAMDef(SoA1ViewTemplate soa, - size_t size) { - addAndMulTemplate(soa, size); +// Device-only producer kernel +__global__ void producerKernel(SoAFullDeviceView soa, const unsigned int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i>=numElements) return; + auto si = soa[i]; + si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char)); + si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z()); } -__global__ void aAMRestrict(SoA1ViewTemplate soa, - size_t size) { - addAndMulTemplate(soa, size); +// Device-only consumer with result in host-device area +__global__ void consumerKernel(SoAFullDeviceView soa, const unsigned int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i>=numElements) return; + auto si = soa[i]; + si.x() = si.color() * si.value(); } -const size_t size = 10000; - -int main() { - // Allocate buffer - std::unique_ptr buffer( - static_cast(std::aligned_alloc(SoA1Layout::defaultAlignment, SoA1Layout::computeDataSize(size))), - std::free); - SoA1Layout soa1(buffer.get(), size); - SoA1View soa1view(soa1); - SoA1View2G soa1v2g(soa1, soa1view); - SoA1View2Gconst soa1v2gconst(soa1, soa1view); - // Write to view - for (size_t i = 0; i < size; i++) { - auto s = soa1view[i]; - s.x = 1.0 * i; - s.y = 2.0 * i; - s.z = 3.0 * i; - s.color() = i; - s.a()(0) = 1.0 * i; - s.a()(1) = 2.0 * i; - s.a()(2) = 3.0 * i; - s.b()(0) = 3.0 * i; - s.b()(1) = 2.0 * i; - s.b()(2) = 1.0 * i; - s.r() = s.a().cross(s.b()); - } - // Check direct read back - for (size_t i = 0; i < size; i++) { - auto s = soa1view[i]; - assert(s.x() == 1.0 * i); - assert(s.y() == 2.0 * i); - assert(s.z() == 3.0 * i); - assert(s.color() == i); - assert(s.a()(0) == 1.0 * i); - assert(s.a()(1) == 2.0 * i); - assert(s.a()(2) == 3.0 * i); - assert(s.b()(0) == 3.0 * i); - assert(s.b()(1) == 2.0 * i); - assert(s.b()(2) == 1.0 * i); - assert(s.r() == s.a().cross(s.b())); +// Get a view like the default, except for range checking +using RangeCheckingHostDeviceView = SoAHostDeviceViewTemplate; + +// We expect to just run one thread. +__global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) { +#if defined(__CUDACC__) && defined(__CUDA_ARCH__) + printf("About to fail range check in CUDA thread: %d\n", threadIdx.x); +#endif + [[maybe_unused]] auto si = soa[soa.soaMetadata().size()]; + printf("We should not have reached here\n"); +} + +int main(void) { + cudaStream_t stream; + cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); + + // Non-aligned number of elements to check alignment features. + constexpr unsigned int numElements = 65537; + + // Allocate buffer and store on host + size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements); + std::byte * h_buf = nullptr; + cudaCheck(cudaMallocHost(&h_buf, hostDeviceSize)); + SoAHostDeviceLayout h_soahdLayout(h_buf, numElements); + SoAHostDeviceView h_soahd(h_soahdLayout); + + // Alocate buffer, stores and views on the device (single, shared buffer). + size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements); + std::byte * d_buf = nullptr; + cudaCheck(cudaMallocHost(&d_buf, hostDeviceSize + deviceOnlySize)); + SoAHostDeviceLayout d_soahdLayout(d_buf, numElements); + SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.soaMetadata().nextByte(), numElements); + SoAHostDeviceView d_soahdView(d_soahdLayout); + SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout); + + // Assert column alignments + assert(0 == reinterpret_cast(h_soahd.soaMetadata().addressOf_x()) % decltype(h_soahd)::byteAlignment); + assert(0 == reinterpret_cast(h_soahd.soaMetadata().addressOf_y()) % decltype(h_soahd)::byteAlignment); + assert(0 == reinterpret_cast(h_soahd.soaMetadata().addressOf_z()) % decltype(h_soahd)::byteAlignment); + assert(0 == reinterpret_cast(h_soahd.soaMetadata().addressOf_a()) % decltype(h_soahd)::byteAlignment); + assert(0 == reinterpret_cast(h_soahd.soaMetadata().addressOf_b()) % decltype(h_soahd)::byteAlignment); + assert(0 == reinterpret_cast(h_soahd.soaMetadata().addressOf_r()) % decltype(h_soahd)::byteAlignment); + assert(0 == + reinterpret_cast(h_soahd.soaMetadata().addressOf_description()) % decltype(h_soahd)::byteAlignment); + assert(0 == + reinterpret_cast(h_soahd.soaMetadata().addressOf_someNumber()) % decltype(h_soahd)::byteAlignment); + + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_x()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_y()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_z()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_a()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_b()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_r()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_description()) % + decltype(d_soahdLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soahdLayout.soaMetadata().addressOf_someNumber()) % + decltype(d_soahdLayout)::byteAlignment); + + assert(0 == reinterpret_cast(d_soadoLayout.soaMetadata().addressOf_color()) % + decltype(d_soadoLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soadoLayout.soaMetadata().addressOf_value()) % + decltype(d_soadoLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soadoLayout.soaMetadata().addressOf_py()) % + decltype(d_soadoLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soadoLayout.soaMetadata().addressOf_count()) % + decltype(d_soadoLayout)::byteAlignment); + assert(0 == reinterpret_cast(d_soadoLayout.soaMetadata().addressOf_anotherCount()) % + decltype(d_soadoLayout)::byteAlignment); + + // Views should get the same alignment as the stores they refer to + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_x()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_y()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_z()) % + decltype(d_soaFullView)::byteAlignment); + // Limitation of views: we have to get scalar member addresses via metadata. + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_description()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_someNumber()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_color()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_value()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_py()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_count()) % + decltype(d_soaFullView)::byteAlignment); + assert(0 == reinterpret_cast(d_soaFullView.soaMetadata().addressOf_anotherCount()) % + decltype(d_soaFullView)::byteAlignment); + + // Initialize and fill the host buffer + std::memset(h_soahdLayout.soaMetadata().data(), 0, hostDeviceSize); + for (size_t i = 0; i < numElements; ++i) { + auto si = h_soahd[i]; + si.x() = si.a()(0) = si.b()(2) = 1.0 * i + 1.0; + si.y() = si.a()(1) = si.b()(1) = 2.0 * i; + si.z() = si.a()(2) = si.b()(0) = 3.0 * i - 1.0; } - // Check readback through other views - for (size_t i = 0; i < size; i++) { - auto sv = soa1view[i]; - auto sv2g = soa1v2g[i]; - auto sv2gc = soa1v2gconst[i]; - assert(sv.x() == 1.0 * i); - assert(sv.y() == 2.0 * i); - assert(sv.z() == 3.0 * i); - assert(sv.color() == i); - assert(sv2g.x() == 1.0 * i); - assert(sv2g.y() == 2.0 * i); - assert(sv2g.color() == i); - assert(sv2gc.x() == 1.0 * i); - assert(sv2gc.y() == 2.0 * i); - assert(sv2gc.color() == i); + auto& sn = h_soahd.someNumber(); + sn = numElements + 2; + + // Push to device + cudaCheck(cudaMemcpyAsync(d_buf, h_buf, hostDeviceSize, cudaMemcpyDefault, stream)); + + // Process on device + crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements); + + // Paint the device only with 0xFF initially + cudaCheck(cudaMemsetAsync(d_soadoLayout.soaMetadata().data(), 0xFF, d_soadoLayout.soaMetadata().byteSize(), stream)); + + // Produce to the device only area + producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements); + + // Consume the device only area and generate a result on the host-device area + consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements); + + // Get result back + cudaCheck(cudaMemcpyAsync(h_buf, d_buf, hostDeviceSize, cudaMemcpyDefault, stream)); + + // Wait and validate. + cudaCheck(cudaStreamSynchronize(stream)); + for (size_t i = 0; i < numElements; ++i) { + auto si = h_soahd[i]; + assert(si.r() == si.a().cross(si.b())); + double initialX = 1.0 * i + 1.0; + double initialY = 2.0 * i; + double initialZ = 3.0 * i - 1.0; + uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char)); + double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ); + if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits::epsilon()) { + std::cout << "X failed: for i=" << i << std::endl + << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl + << "expectedX=" << expectedX << std::endl + << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl + << "relativeDiff=" << abs(si.x() - expectedX) / expectedX + << " epsilon=" << std::numeric_limits::epsilon() << std::endl; + assert(false); + } } // Validation of range checking try { // Get a view like the default, except for range checking - SoA1ViewTemplate - soa1viewRangeChecking(soa1); + SoAHostDeviceViewTemplate + soa1viewRangeChecking(h_soahdLayout); // This should throw an exception [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.soaMetadata().size()]; assert(false); - } catch (const std::out_of_range &) { + } catch (const std::out_of_range&) { + } + + // Validation of range checking in a kernel + // Get a view like the default, except for range checking + RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout); + // This should throw an exception in the kernel + try { + rangeCheckKernel<<<1,1,0,stream>>>(soa1viewRangeChecking); + } catch (const std::out_of_range&) { + std::cout << "Exception received in enqueue." << std::endl; } - // Print out the layout - std::cout << soa1 << std::endl; -} \ No newline at end of file + // Wait and validate (that we failed). + try { + cudaCheck(cudaStreamSynchronize(stream)); + } catch (const std::runtime_error&) { + std::cout << "Exception received in wait." << std::endl; + } + + std::cout << "OK" << std::endl; +}