forked from makortel/pixel-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GPUSimpleVector.h
246 lines (215 loc) · 7.65 KB
/
GPUSimpleVector.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#ifndef GPUSimpleVector_h_
#define GPUSimpleVector_h_
// author: Felice Pantaleo, CERN, 2018
#include <type_traits>
#include <utility>
#if defined DIGI_CUDA
#include <cuda.h>
#elif defined DIGI_ALPAKA
#if defined ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_BT_OMP4_ENABLED || defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED || defined ALPAKA_ACC_GPU_CUDA_ENABLED
#include <alpaka/alpaka.hpp>
#endif // ALPAKA_ACC_*_ENABLED
#elif defined DIGI_CUPLA
/* Do NOT include other headers that use CUDA runtime functions or variables
* before this include, because cupla renames CUDA host functions and device
* built-in variables using macros and macro functions.
* Do NOT include other specific includes such as `<cuda.h>`, etc.
*/
#if defined ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_BT_OMP4_ENABLED || defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED || defined ALPAKA_ACC_GPU_CUDA_ENABLED
#include <cuda_to_cupla.hpp>
#endif // ALPAKA_ACC_*_ENABLED
#elif defined DIGI_KOKKOS
#include <Kokkos_Core.hpp>
#elif defined DIGI_ONEAPI
#include <CL/sycl.hpp>
#include <dpct/dpct.hpp>
#endif
namespace GPU {
template <class T>
struct SimpleVector {
constexpr SimpleVector() = default;
// ownership of m_data stays within the caller
constexpr void construct(int capacity, T *data) {
m_size = 0;
m_capacity = capacity;
m_data = data;
}
inline constexpr int push_back_unsafe(const T &element) {
auto previousSize = m_size;
m_size++;
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
--m_size;
return -1;
}
}
template <class... Ts>
constexpr int emplace_back_unsafe(Ts &&... args) {
auto previousSize = m_size;
m_size++;
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
--m_size;
return -1;
}
}
inline constexpr T &back() const {
if (m_size > 0) {
return m_data[m_size - 1];
} else
return T(); //undefined behaviour
}
#if defined DIGI_CUDA && defined __CUDACC__
// thread-safe version of the vector, when used in a CUDA kernel
__device__ int push_back(const T &element) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
}
}
template <class... Ts>
__device__ int emplace_back(Ts &&... args) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
}
}
#elif defined DIGI_ALPAKA
#if defined ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_BT_OMP4_ENABLED || defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED || defined ALPAKA_ACC_GPU_CUDA_ENABLED
template <typename T_Acc>
ALPAKA_FN_ACC int push_back(T_Acc const &acc, const T &element) {
auto previousSize = alpaka::atomic::atomicOp<alpaka::atomic::op::Add>(acc, &m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
alpaka::atomic::atomicOp<alpaka::atomic::op::Sub>(acc, &m_size, 1);
return -1;
}
}
template <typename T_Acc, class... Ts>
ALPAKA_FN_ACC int emplace_back(T_Acc const &acc, Ts &&... args) {
auto previousSize = alpaka::atomic::atomicOp<alpaka::atomic::op::Add>(acc, &m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
alpaka::atomic::atomicOp<alpaka::atomic::op::Sub>(acc, &m_size, 1);
return -1;
}
}
#endif // ALPAKA_ACC_*_ENABLED
#elif defined DIGI_CUPLA
#if defined ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_BT_OMP4_ENABLED || defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED || defined ALPAKA_ACC_GPU_CUDA_ENABLED
template <typename T_Acc>
ALPAKA_FN_ACC int push_back(T_Acc const &acc, const T &element) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
}
}
template <typename T_Acc, class... Ts>
ALPAKA_FN_ACC int emplace_back(T_Acc const &acc, Ts &&... args) {
auto previousSize = atomicAdd(&m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
atomicSub(&m_size, 1);
return -1;
}
}
#endif // ALPAKA_ACC_*_ENABLED
#elif defined DIGI_KOKKOS
KOKKOS_INLINE_FUNCTION
int push_back(const T &element) {
auto previousSize = Kokkos::atomic_fetch_add(&m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
Kokkos::atomic_sub(&m_size, 1);
return -1;
}
}
template <class... Ts>
KOKKOS_INLINE_FUNCTION int emplace_back(Ts &&... args) {
auto previousSize = Kokkos::atomic_fetch_add(&m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
Kokkos::atomic_sub(&m_size, 1);
return -1;
}
}
#elif defined DIGI_ONEAPI
int push_back(const T &element) {
auto previousSize = dpct::atomic_fetch_add(&m_size, 1);
if (previousSize < m_capacity) {
m_data[previousSize] = element;
return previousSize;
} else {
dpct::atomic_fetch_sub(&m_size, 1);
return -1;
}
}
template <class... Ts>
int emplace_back(Ts &&... args) {
auto previousSize = dpct::atomic_fetch_add(&m_size, 1);
if (previousSize < m_capacity) {
(new (&m_data[previousSize]) T(std::forward<Ts>(args)...));
return previousSize;
} else {
dpct::atomic_fetch_sub(&m_size, 1);
return -1;
}
}
#endif
inline constexpr bool empty() const { return m_size == 0; }
inline constexpr bool full() const { return m_size == m_capacity; }
inline constexpr T &operator[](int i) { return m_data[i]; }
inline constexpr const T &operator[](int i) const { return m_data[i]; }
inline constexpr void reset() { m_size = 0; }
inline constexpr int size() const { return m_size; }
inline constexpr int capacity() const { return m_capacity; }
inline constexpr T const *data() const { return m_data; }
inline constexpr void resize(int size) { m_size = size; }
inline constexpr void set_data(T *data) { m_data = data; }
private:
int m_size;
int m_capacity;
T *m_data;
};
// ownership of m_data stays within the caller
template <class T>
SimpleVector<T> make_SimpleVector(int capacity, T *data) {
SimpleVector<T> ret;
ret.construct(capacity, data);
return ret;
}
// ownership of m_data stays within the caller
template <class T>
SimpleVector<T> *make_SimpleVector(SimpleVector<T> *mem, int capacity, T *data) {
auto ret = new (mem) SimpleVector<T>();
ret->construct(capacity, data);
return ret;
}
} // namespace GPU
#endif // GPUSimpleVector_h_