Skip to content

Commit

Permalink
Replace use of CUDA API wrapper unique_ptrs with CUDAUtilities unique…
Browse files Browse the repository at this point in the history
…_ptrs (#396)

Replace cuda::memory::device::make_unique() calls with cudautils::make_device_unique()
Replace cuda::memory::host::make_unique() with cudautils::make_host_unique()
  • Loading branch information
waredjeb authored and fwyzard committed Oct 31, 2019
1 parent 8177676 commit cce3f33
Show file tree
Hide file tree
Showing 13 changed files with 80 additions and 71 deletions.
18 changes: 9 additions & 9 deletions DataFormats/GeometrySurface/test/gpuFrameTransformTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <numeric>

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "DataFormats/GeometrySurface/interface/GloballyPositioned.h"
#include "DataFormats/GeometrySurface/interface/SOARotation.h"
Expand Down Expand Up @@ -51,15 +51,15 @@ int main(void) {
float ge[6 * size];

auto current_device = cuda::device::current::get();
auto d_xl = cuda::memory::device::make_unique<float[]>(current_device, size);
auto d_yl = cuda::memory::device::make_unique<float[]>(current_device, size);
auto d_xl = cudautils::make_device_unique<float[]>(size, nullptr);
auto d_yl = cudautils::make_device_unique<float[]>(size, nullptr);

auto d_x = cuda::memory::device::make_unique<float[]>(current_device, size);
auto d_y = cuda::memory::device::make_unique<float[]>(current_device, size);
auto d_z = cuda::memory::device::make_unique<float[]>(current_device, size);
auto d_x = cudautils::make_device_unique<float[]>(size, nullptr);
auto d_y = cudautils::make_device_unique<float[]>(size, nullptr);
auto d_z = cudautils::make_device_unique<float[]>(size, nullptr);

auto d_le = cuda::memory::device::make_unique<float[]>(current_device, 3 * size);
auto d_ge = cuda::memory::device::make_unique<float[]>(current_device, 6 * size);
auto d_le = cudautils::make_device_unique<float[]>(3 * size, nullptr);
auto d_ge = cudautils::make_device_unique<float[]>(6 * size, nullptr);

double a = 0.01;
double ca = std::cos(a);
Expand All @@ -73,7 +73,7 @@ int main(void) {
SFrame sf1(f1.position().x(), f1.position().y(), f1.position().z(), f1.rotation());

// auto d_sf = cuda::memory::device::make_unique<SFrame[]>(current_device, 1);
auto d_sf = cuda::memory::device::make_unique<char[]>(current_device, sizeof(SFrame));
auto d_sf = cudautils::make_device_unique<char[]>(sizeof(SFrame), nullptr);
cudaCheck(cudaMemcpy(d_sf.get(), &sf1, sizeof(SFrame), cudaMemcpyHostToDevice));

for (auto i = 0U; i < size; ++i) {
Expand Down
3 changes: 2 additions & 1 deletion DataFormats/Math/test/CholeskyInvert_t.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <cuda/api_wrappers.h>

#include "DataFormats/Math/interface/choleskyInversion.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
Expand Down Expand Up @@ -132,7 +133,7 @@ void go(bool soa) {

std::cout << mm[SIZE / 2](1, 1) << std::endl;

auto m_d = cuda::memory::device::make_unique<double[]>(current_device, DIM * DIM * stride());
auto m_d = cudautils::make_device_unique<double[]>(DIM * DIM * stride(), nullptr);
cudaCheck(cudaMemcpy(m_d.get(), (double const *)(mm), stride() * sizeof(MX), cudaMemcpyHostToDevice));

constexpr int NKK =
Expand Down
3 changes: 2 additions & 1 deletion DataFormats/Math/test/cudaAtan2Test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ end
#include "cuda/api_wrappers.h"

#include "DataFormats/Math/interface/approx_atan2.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
Expand Down Expand Up @@ -70,7 +71,7 @@ void go() {
// atan2
delta -= (std::chrono::high_resolution_clock::now() - start);

auto diff_d = cuda::memory::device::make_unique<int[]>(current_device, 3);
auto diff_d = cudautils::make_device_unique<int[]>(3, nullptr);

int diffs[3];
cudaCheck(cudaMemset(diff_d.get(), 0, 3 * 4));
Expand Down
7 changes: 4 additions & 3 deletions DataFormats/Math/test/cudaMathTest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ end
#include "DataFormats/Math/interface/approx_log.h"
#include "DataFormats/Math/interface/approx_exp.h"
#include "DataFormats/Math/interface/approx_atan2.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
Expand Down Expand Up @@ -103,9 +104,9 @@ void go() {
std::generate(h_B.get(), h_B.get() + numElements, [&]() { return rgen(eng); });

delta -= (std::chrono::high_resolution_clock::now() - start);
auto d_A = cuda::memory::device::make_unique<float[]>(current_device, numElements);
auto d_B = cuda::memory::device::make_unique<float[]>(current_device, numElements);
auto d_C = cuda::memory::device::make_unique<float[]>(current_device, numElements);
auto d_A = cudautils::make_device_unique<float[]>(numElements, nullptr);
auto d_B = cudautils::make_device_unique<float[]>(numElements, nullptr);
auto d_C = cudautils::make_device_unique<float[]>(numElements, nullptr);

cudaCheck(cudaMemcpy(d_A.get(), h_A.get(), size, cudaMemcpyHostToDevice));
cudaCheck(cudaMemcpy(d_B.get(), h_B.get(), size, cudaMemcpyHostToDevice));
Expand Down
7 changes: 4 additions & 3 deletions HeterogeneousCore/CUDACore/test/test_CUDAScopedContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "FWCore/Concurrency/interface/WaitingTask.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h"
Expand Down Expand Up @@ -90,12 +91,12 @@ TEST_CASE("Use of CUDAScopedContext", "[CUDACore]") {

// Mimick a producer on the first CUDA stream
int h_a1 = 1;
auto d_a1 = cuda::memory::device::make_unique<int>(current_device);
auto d_a1 = cudautils::make_device_unique<int>(nullptr);
auto wprod1 = produce(defaultDevice, d_a1.get(), &h_a1);

// Mimick a producer on the second CUDA stream
int h_a2 = 2;
auto d_a2 = cuda::memory::device::make_unique<int>(current_device);
auto d_a2 = cudautils::make_device_unique<int>(nullptr);
auto wprod2 = produce(defaultDevice, d_a2.get(), &h_a2);

REQUIRE(wprod1->stream() != wprod2->stream());
Expand All @@ -106,7 +107,7 @@ TEST_CASE("Use of CUDAScopedContext", "[CUDACore]") {
auto prod1 = ctx2.get(*wprod1);
auto prod2 = ctx2.get(*wprod2);

auto d_a3 = cuda::memory::device::make_unique<int>(current_device);
auto d_a3 = cudautils::make_device_unique<int>(nullptr);
testCUDAScopedContextKernels_join(prod1, prod2, d_a3.get(), ctx2.stream());
cudaCheck(cudaStreamSynchronize(ctx2.stream()));
REQUIRE(wprod2->isAvailable());
Expand Down
10 changes: 5 additions & 5 deletions HeterogeneousCore/CUDAUtilities/test/HistoContainer_t.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
Expand All @@ -25,7 +26,7 @@ void go() {

constexpr int N = 12000;
T v[N];
auto v_d = cuda::memory::device::make_unique<T[]>(current_device, N);
auto v_d = cudautils::make_device_unique<T[]>(N, nullptr);

cudaCheck(cudaMemcpy(v_d.get(), v, N * sizeof(T), cudaMemcpyHostToDevice));

Expand All @@ -39,11 +40,10 @@ void go() {
<< (std::numeric_limits<T>::max() - std::numeric_limits<T>::min()) / Hist::nbins() << std::endl;

Hist h;
auto h_d = cudautils::make_device_unique<Hist[]>(1, nullptr);
auto ws_d = cudautils::make_device_unique<uint8_t[]>(Hist::wsSize(), nullptr);

auto h_d = cuda::memory::device::make_unique<Hist[]>(current_device, 1);
auto ws_d = cuda::memory::device::make_unique<uint8_t[]>(current_device, Hist::wsSize());

auto off_d = cuda::memory::device::make_unique<uint32_t[]>(current_device, nParts + 1);
auto off_d = cudautils::make_device_unique<uint32_t[]>(nParts + 1, nullptr);

for (int it = 0; it < 5; ++it) {
offsets[0] = 0;
Expand Down
3 changes: 2 additions & 1 deletion HeterogeneousCore/CUDAUtilities/test/OneHistoContainer_t.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
Expand Down Expand Up @@ -115,7 +116,7 @@ void go() {
constexpr int N = 12000;
T v[N];

auto v_d = cuda::memory::device::make_unique<T[]>(current_device, N);
auto v_d = cudautils::make_device_unique<T[]>(N, nullptr);
assert(v_d.get());

using Hist = HistoContainer<T, NBINS, N, S>;
Expand Down
13 changes: 7 additions & 6 deletions HeterogeneousCore/CUDAUtilities/test/OneToManyAssoc_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#ifdef __CUDACC__
#include <cuda/api_wrappers.h>
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#endif
Expand Down Expand Up @@ -165,11 +166,11 @@ int main() {
std::cout << "filled with " << n << " elements " << double(ave) / n << ' ' << imax << ' ' << nz << std::endl;

#ifdef __CUDACC__
auto v_d = cuda::memory::device::make_unique<std::array<uint16_t, 4>[]>(current_device, N);
auto v_d = cudautils::make_device_unique<std::array<uint16_t, 4>[]>(N, nullptr);
assert(v_d.get());
auto a_d = cuda::memory::device::make_unique<Assoc[]>(current_device, 1);
auto sa_d = cuda::memory::device::make_unique<SmallAssoc[]>(current_device, 1);
auto ws_d = cuda::memory::device::make_unique<uint8_t[]>(current_device, Assoc::wsSize());
auto a_d = cudautils::make_device_unique<Assoc[]>(1, nullptr);
auto sa_d = cudautils::make_device_unique<SmallAssoc[]>(1, nullptr);
auto ws_d = cudautils::make_device_unique<uint8_t[]>(Assoc::wsSize(), nullptr);

cudaCheck(cudaMemcpy(v_d.get(), tr.data(), N * sizeof(std::array<uint16_t, 4>), cudaMemcpyHostToDevice));
#else
Expand Down Expand Up @@ -272,8 +273,8 @@ int main() {

// here verify use of block local counters
#ifdef __CUDACC__
auto m1_d = cuda::memory::device::make_unique<Multiplicity[]>(current_device, 1);
auto m2_d = cuda::memory::device::make_unique<Multiplicity[]>(current_device, 1);
auto m1_d = cudautils::make_device_unique<Multiplicity[]>(1, nullptr);
auto m2_d = cudautils::make_device_unique<Multiplicity[]>(1, nullptr);
#else
auto m1_d = std::make_unique<Multiplicity>();
auto m2_d = std::make_unique<Multiplicity>();
Expand Down
9 changes: 5 additions & 4 deletions HeterogeneousCore/CUDAUtilities/test/radixSort_t.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
Expand Down Expand Up @@ -98,10 +99,10 @@ void go(bool useShared) {

std::random_shuffle(v, v + N);

auto v_d = cuda::memory::device::make_unique<U[]>(current_device, N);
auto ind_d = cuda::memory::device::make_unique<uint16_t[]>(current_device, N);
auto ws_d = cuda::memory::device::make_unique<uint16_t[]>(current_device, N);
auto off_d = cuda::memory::device::make_unique<uint32_t[]>(current_device, blocks + 1);
auto v_d = cudautils::make_device_unique<U[]>(N, nullptr);
auto ind_d = cudautils::make_device_unique<uint16_t[]>(N, nullptr);
auto ws_d = cudautils::make_device_unique<uint16_t[]>(N, nullptr);
auto off_d = cudautils::make_device_unique<uint32_t[]>(blocks + 1, nullptr);

cudaCheck(cudaMemcpy(v_d.get(), v, N * sizeof(T), cudaMemcpyHostToDevice));
cudaCheck(cudaMemcpy(off_d.get(), offsets, 4 * (blocks + 1), cudaMemcpyHostToDevice));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <cuda/api_wrappers.h>

#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
#include "TestHeterogeneousEDProducerGPUHelpers.h"
Expand Down Expand Up @@ -64,18 +66,18 @@ int TestAcceleratorServiceProducerGPUHelpers_simple_kernel(int input) {
auto current_device = cuda::device::current::get();
auto stream = current_device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream);

auto h_a = cuda::memory::host::make_unique<int[]>(NUM_VALUES);
auto h_b = cuda::memory::host::make_unique<int[]>(NUM_VALUES);
auto h_c = cuda::memory::host::make_unique<int[]>(NUM_VALUES);
auto h_a = cudautils::make_host_unique<int[]>(NUM_VALUES, nullptr);
auto h_b = cudautils::make_host_unique<int[]>(NUM_VALUES, nullptr);
auto h_c = cudautils::make_host_unique<int[]>(NUM_VALUES, nullptr);

for (auto i = 0; i < NUM_VALUES; i++) {
h_a[i] = input + i;
h_b[i] = i * i;
}

auto d_a = cuda::memory::device::make_unique<int[]>(current_device, NUM_VALUES);
auto d_b = cuda::memory::device::make_unique<int[]>(current_device, NUM_VALUES);
auto d_c = cuda::memory::device::make_unique<int[]>(current_device, NUM_VALUES);
auto d_a = cudautils::make_device_unique<int[]>(NUM_VALUES, nullptr);
auto d_b = cudautils::make_device_unique<int[]>(NUM_VALUES, nullptr);
auto d_c = cudautils::make_device_unique<int[]>(NUM_VALUES, nullptr);

cudaCheck(cudaMemcpyAsync(d_a.get(), h_a.get(), NUM_VALUES * sizeof(int), cudaMemcpyHostToDevice, stream.id()));
cudaCheck(cudaMemcpyAsync(d_b.get(), h_b.get(), NUM_VALUES * sizeof(int), cudaMemcpyHostToDevice, stream.id()));
Expand Down Expand Up @@ -108,15 +110,14 @@ namespace {
}

TestHeterogeneousEDProducerGPUTask::TestHeterogeneousEDProducerGPUTask() {
h_a = cuda::memory::host::make_unique<float[]>(NUM_VALUES);
h_b = cuda::memory::host::make_unique<float[]>(NUM_VALUES);
h_a = cudautils::make_host_unique<float[]>(NUM_VALUES, nullptr);
h_b = cudautils::make_host_unique<float[]>(NUM_VALUES, nullptr);

auto current_device = cuda::device::current::get();
d_b = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES);

d_ma = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES * NUM_VALUES);
d_mb = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES * NUM_VALUES);
d_mc = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES * NUM_VALUES);
d_b = cudautils::make_device_unique<float[]>(NUM_VALUES, nullptr);
d_ma = cudautils::make_device_unique<float[]>(NUM_VALUES * NUM_VALUES, nullptr);
d_mb = cudautils::make_device_unique<float[]>(NUM_VALUES * NUM_VALUES, nullptr);
d_mc = cudautils::make_device_unique<float[]>(NUM_VALUES * NUM_VALUES, nullptr);
}

TestHeterogeneousEDProducerGPUTask::ResultType TestHeterogeneousEDProducerGPUTask::runAlgo(
Expand All @@ -139,10 +140,10 @@ TestHeterogeneousEDProducerGPUTask::ResultType TestHeterogeneousEDProducerGPUTas
}

auto current_device = cuda::device::current::get();
auto d_a = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES);
auto d_c = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES);
auto d_a = cudautils::make_device_unique<float[]>(NUM_VALUES, nullptr);
auto d_c = cudautils::make_device_unique<float[]>(NUM_VALUES, nullptr);
if (inputArrays.second != nullptr) {
d_d = cuda::memory::device::make_unique<float[]>(current_device, NUM_VALUES);
d_d = cudautils::make_device_unique<float[]>(NUM_VALUES, nullptr);
}

// Create stream
Expand Down Expand Up @@ -194,7 +195,7 @@ void TestHeterogeneousEDProducerGPUTask::release(const std::string &label, cuda:
}

int TestHeterogeneousEDProducerGPUTask::getResult(const ResultTypeRaw &d_ac, cuda::stream_t<> &stream) {
auto h_c = cuda::memory::host::make_unique<float[]>(NUM_VALUES);
auto h_c = cudautils::make_device_unique<float[]>(NUM_VALUES, nullptr);
cudaCheck(cudaMemcpyAsync(h_c.get(), d_ac.second, NUM_VALUES * sizeof(int), cudaMemcpyDeviceToHost, stream.id()));
stream.synchronize();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

#include <functional>
#include <memory>
#include <string>
Expand All @@ -15,7 +18,7 @@ class TestHeterogeneousEDProducerGPUTask {
TestHeterogeneousEDProducerGPUTask();
~TestHeterogeneousEDProducerGPUTask() = default;

using Ptr = cuda::memory::device::unique_ptr<float[]>;
using Ptr = cudautils::device::unique_ptr<float[]>;
using PtrRaw = Ptr::pointer;

using ResultType = std::pair<Ptr, Ptr>;
Expand All @@ -30,15 +33,15 @@ class TestHeterogeneousEDProducerGPUTask {
std::unique_ptr<cuda::stream_t<>> streamPtr;

// stored for the job duration
cuda::memory::host::unique_ptr<float[]> h_a;
cuda::memory::host::unique_ptr<float[]> h_b;
cuda::memory::device::unique_ptr<float[]> d_b;
cuda::memory::device::unique_ptr<float[]> d_ma;
cuda::memory::device::unique_ptr<float[]> d_mb;
cuda::memory::device::unique_ptr<float[]> d_mc;
cudautils::host::unique_ptr<float[]> h_a;
cudautils::host::unique_ptr<float[]> h_b;
cudautils::device::unique_ptr<float[]> d_b;
cudautils::device::unique_ptr<float[]> d_ma;
cudautils::device::unique_ptr<float[]> d_mb;
cudautils::device::unique_ptr<float[]> d_mc;

// temporary storage, need to be somewhere to allow async execution
cuda::memory::device::unique_ptr<float[]> d_d;
cudautils::device::unique_ptr<float[]> d_d;
};

#endif
20 changes: 9 additions & 11 deletions RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#ifdef __CUDACC__
#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
Expand Down Expand Up @@ -44,17 +45,14 @@ int main(void) {

#ifdef __CUDACC__
auto current_device = cuda::device::current::get();
auto d_id = cuda::memory::device::make_unique<uint16_t[]>(current_device, numElements);
auto d_x = cuda::memory::device::make_unique<uint16_t[]>(current_device, numElements);
auto d_y = cuda::memory::device::make_unique<uint16_t[]>(current_device, numElements);
auto d_adc = cuda::memory::device::make_unique<uint16_t[]>(current_device, numElements);

auto d_clus = cuda::memory::device::make_unique<int[]>(current_device, numElements);

auto d_moduleStart = cuda::memory::device::make_unique<uint32_t[]>(current_device, MaxNumModules + 1);

auto d_clusInModule = cuda::memory::device::make_unique<uint32_t[]>(current_device, MaxNumModules);
auto d_moduleId = cuda::memory::device::make_unique<uint32_t[]>(current_device, MaxNumModules);
auto d_id = cudautils::make_device_unique<uint16_t[]>(numElements, nullptr);
auto d_x = cudautils::make_device_unique<uint16_t[]>(numElements, nullptr);
auto d_y = cudautils::make_device_unique<uint16_t[]>(numElements, nullptr);
auto d_adc = cudautils::make_device_unique<uint16_t[]>(numElements, nullptr);
auto d_clus = cudautils::make_device_unique<int[]>(numElements, nullptr);
auto d_moduleStart = cudautils::make_device_unique<uint32_t[]>(MaxNumModules + 1, nullptr);
auto d_clusInModule = cudautils::make_device_unique<uint32_t[]>(MaxNumModules, nullptr);
auto d_moduleId = cudautils::make_device_unique<uint32_t[]>(MaxNumModules, nullptr);
#else

auto h_moduleStart = std::make_unique<uint32_t[]>(MaxNumModules + 1);
Expand Down
Loading

0 comments on commit cce3f33

Please sign in to comment.