Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add wait or ignore for all async copies #645

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ container_d2h_copy_alg<CONTAINER_TYPES>::operator()(input_type input) const {
[](const auto& view) { return view.capacity(); });
typename CONTAINER_TYPES::buffer hostBuffer{{size, *host_mr},
{capacities, *host_mr}};
m_hostCopy.setup(hostBuffer.headers);
m_hostCopy.setup(hostBuffer.items);
m_hostCopy.setup(hostBuffer.headers)->wait();
m_hostCopy.setup(hostBuffer.items)->wait();

// Copy the device container into this temporary host buffer.
vecmem::copy::event_type header_event = m_deviceCopy(
Expand Down
28 changes: 16 additions & 12 deletions device/common/include/traccc/device/impl/container_h2d_copy_alg.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ container_h2d_copy_alg<CONTAINER_TYPES>::operator()(input_type input) const {
// Create the output buffer with the correct sizes.
output_type result{{static_cast<header_size_type>(sizes.size()), m_mr.main},
{sizes, m_mr.main, m_mr.host}};
m_deviceCopy.setup(result.headers);
m_deviceCopy.setup(result.items);
m_deviceCopy.setup(result.headers)->wait();
m_deviceCopy.setup(result.items)->wait();

// Copy data straight into it.
m_deviceCopy(input.headers, result.headers,
vecmem::copy::type::host_to_device);
m_deviceCopy(input.items, result.items, vecmem::copy::type::host_to_device);
vecmem::copy::type::host_to_device)
->wait();
m_deviceCopy(input.items, result.items, vecmem::copy::type::host_to_device)
->wait();

// Return the created buffer.
return result;
Expand All @@ -56,23 +58,25 @@ container_h2d_copy_alg<CONTAINER_TYPES>::operator()(
// Create/set the host buffer.
hostBuffer =
typename CONTAINER_TYPES::buffer{{size, *host_mr}, {sizes, *host_mr}};
m_hostCopy.setup(hostBuffer.headers);
m_hostCopy.setup(hostBuffer.items);
m_hostCopy.setup(hostBuffer.headers)->wait();
m_hostCopy.setup(hostBuffer.items)->wait();

// Copy the data into the host buffer.
m_hostCopy(input.headers, hostBuffer.headers);
m_hostCopy(input.items, hostBuffer.items);
m_hostCopy(input.headers, hostBuffer.headers)->wait();
m_hostCopy(input.items, hostBuffer.items)->wait();

// Create the output buffer with the correct sizes.
output_type result{{size, m_mr.main}, {sizes, m_mr.main, m_mr.host}};
m_deviceCopy.setup(result.headers);
m_deviceCopy.setup(result.items);
m_deviceCopy.setup(result.headers)->wait();
m_deviceCopy.setup(result.items)->wait();

// Copy data from the host buffer into the device/result buffer.
m_deviceCopy(hostBuffer.headers, result.headers,
vecmem::copy::type::host_to_device);
vecmem::copy::type::host_to_device)
->wait();
m_deviceCopy(hostBuffer.items, result.items,
vecmem::copy::type::host_to_device);
vecmem::copy::type::host_to_device)
->wait();

// Return the created buffer.
return result;
Expand Down
2 changes: 1 addition & 1 deletion device/common/src/make_prefix_sum_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ prefix_sum_buffer_t make_prefix_sum_buffer(
// Create buffer and view objects
vecmem::data::vector_buffer<prefix_sum_size_t> sizes_sum_buff(
sizes_sum.size(), mr.main);
copy.setup(sizes_sum_buff);
copy.setup(sizes_sum_buff)->wait();
(copy)(vecmem::get_data(sizes_sum), sizes_sum_buff)->wait();
vecmem::data::vector_view<prefix_sum_size_t> sizes_sum_view(
sizes_sum_buff);
Expand Down
24 changes: 12 additions & 12 deletions device/cuda/src/finding/finding_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
cudaStream_t stream = details::get_stream(m_stream);

// Copy setup
m_copy.setup(seeds_buffer);
m_copy.setup(navigation_buffer);
m_copy.setup(seeds_buffer)->ignore();
m_copy.setup(navigation_buffer)->ignore();

const unsigned int n_seeds = m_copy.get_size(seeds_buffer);

Expand Down Expand Up @@ -413,7 +413,7 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
// Create the link map
link_map[step] = {n_in_params * m_cfg.max_num_branches_per_surface,
m_mr.main};
m_copy.setup(link_map[step]);
m_copy.setup(link_map[step])->ignore();
nBlocks = (global_counter_host.n_measurements_sum +
nThreads * m_cfg.n_measurements_per_thread - 1) /
(nThreads * m_cfg.n_measurements_per_thread);
Expand Down Expand Up @@ -463,12 +463,12 @@ finding_algorithm<stepper_t, navigator_t>::operator()(

// Create the param to link ID map
param_to_link_map[step] = {global_counter_host.n_candidates, m_mr.main};
m_copy.setup(param_to_link_map[step]);
m_copy.setup(param_to_link_map[step])->ignore();

// Create the tip map
tips_map[step] = {global_counter_host.n_candidates, m_mr.main,
vecmem::data::buffer_type::resizable};
m_copy.setup(tips_map[step]);
m_copy.setup(tips_map[step])->ignore();

nThreads = m_warp_size * 2;

Expand Down Expand Up @@ -505,7 +505,7 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
// Create link buffer
vecmem::data::jagged_vector_buffer<candidate_link> links_buffer(
n_candidates_per_step, m_mr.main, m_mr.host);
m_copy.setup(links_buffer);
m_copy.setup(links_buffer)->ignore();

// Copy link map to link buffer
const auto n_steps = n_candidates_per_step.size();
Expand All @@ -522,7 +522,7 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
// Create param_to_link
vecmem::data::jagged_vector_buffer<unsigned int> param_to_link_buffer(
n_parameters_per_step, m_mr.main, m_mr.host);
m_copy.setup(param_to_link_buffer);
m_copy.setup(param_to_link_buffer)->ignore();

// Copy param_to_link map to param_to_link buffer
for (unsigned int it = 0; it < n_steps; it++) {
Expand All @@ -547,7 +547,7 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
std::accumulate(n_tips_per_step.begin(), n_tips_per_step.end(), 0);
vecmem::data::vector_buffer<typename candidate_link::link_index_type>
tips_buffer{n_tips_total, m_mr.main};
m_copy.setup(tips_buffer);
m_copy.setup(tips_buffer)->ignore();

vecmem::device_vector<typename candidate_link::link_index_type> tips(
tips_buffer);
Expand Down Expand Up @@ -577,8 +577,8 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
m_cfg.max_track_candidates_per_track),
m_mr.main, m_mr.host, vecmem::data::buffer_type::resizable}};

m_copy.setup(track_candidates_buffer.headers);
m_copy.setup(track_candidates_buffer.items);
m_copy.setup(track_candidates_buffer.headers)->ignore();
m_copy.setup(track_candidates_buffer.items)->ignore();

// Create buffer for valid indices
vecmem::data::vector_buffer<unsigned int> valid_indices_buffer(n_tips_total,
Expand Down Expand Up @@ -611,8 +611,8 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
m_cfg.max_track_candidates_per_track),
m_mr.main, m_mr.host, vecmem::data::buffer_type::resizable}};

m_copy.setup(prune_candidates_buffer.headers);
m_copy.setup(prune_candidates_buffer.items);
m_copy.setup(prune_candidates_buffer.headers)->ignore();
m_copy.setup(prune_candidates_buffer.items)->ignore();

if (global_counter_host.n_valid_tracks > 0) {
nThreads = m_warp_size * 2;
Expand Down
6 changes: 3 additions & 3 deletions device/cuda/src/fitting/fitting_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
{candidate_sizes, m_mr.main, m_mr.host,
vecmem::data::buffer_type::resizable}};

m_copy.setup(track_states_buffer.headers);
m_copy.setup(track_states_buffer.items);
m_copy.setup(navigation_buffer);
m_copy.setup(track_states_buffer.headers)->ignore();
m_copy.setup(track_states_buffer.items)->ignore();
m_copy.setup(navigation_buffer)->ignore();

// Calculate the number of threads and thread blocks to run the track
// fitting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ spacepoint_formation<detector_t>::operator()(

spacepoint_collection_types::buffer spacepoints_buffer(
n_measurements, m_mr.main, vecmem::data::buffer_type::resizable);
m_copy.setup(spacepoints_buffer);
m_copy.setup(spacepoints_buffer)->ignore();

unsigned int nThreads = warpSize * 2;
unsigned int nBlocks = (n_measurements + nThreads - 1) / nThreads;
Expand Down
4 changes: 2 additions & 2 deletions device/cuda/src/utils/make_prefix_sum_buff.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ vecmem::data::vector_buffer<device::prefix_sum_element_t> make_prefix_sum_buff(
// Create buffer and view objects for prefix sum vector
vecmem::data::vector_buffer<device::prefix_sum_element_t> prefix_sum_buff(
totalSize, mr.main);
copy.setup(prefix_sum_buff);
copy.setup(prefix_sum_buff)->ignore();

// Fill the prefix sum vector
static const unsigned int threadsPerBlock = 32;
Expand Down Expand Up @@ -76,7 +76,7 @@ vecmem::data::vector_buffer<device::prefix_sum_element_t> make_prefix_sum_buff(
// Create buffer and view objects for prefix sum vector
vecmem::data::vector_buffer<device::prefix_sum_element_t> prefix_sum_buff(
totalSize, mr.main);
copy.setup(prefix_sum_buff);
copy.setup(prefix_sum_buff)->ignore();

// Fill the prefix sum vector
static const unsigned int threadsPerBlock = 32;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()(
// Create the result object, overestimating the number of measurements.
measurement_collection_types::buffer measurements{
num_cells, m_mr.main, vecmem::data::buffer_type::resizable};
m_copy.get().setup(measurements);
m_copy.get().setup(measurements)->wait();
measurement_collection_types::view measurements_view(measurements);

// If there are no cells, return right away.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ spacepoint_formation_algorithm::operator()(
// Create the result buffer.
spacepoint_collection_types::buffer spacepoints(num_measurements,
m_mr.main);
m_copy.get().setup(spacepoints);
m_copy.get().setup(spacepoints)->wait();

// If there are no measurements, we can conclude here.
if (num_measurements == 0) {
Expand Down
6 changes: 3 additions & 3 deletions device/sycl/src/fitting/fitting_algorithm.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
{candidate_sizes, m_mr.main, m_mr.host,
vecmem::data::buffer_type::resizable}};

m_copy->setup(track_states_buffer.headers);
m_copy->setup(track_states_buffer.items);
m_copy->setup(navigation_buffer);
m_copy->setup(track_states_buffer.headers)->wait();
m_copy->setup(track_states_buffer.items)->wait();
m_copy->setup(navigation_buffer)->wait();

track_state_container_types::view track_states_view(track_states_buffer);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ spacepoint_formation<detector_t>::operator()(

spacepoint_collection_types::buffer spacepoints_buffer(
n_measurements, m_mr.main, vecmem::data::buffer_type::resizable);
m_copy.setup(spacepoints_buffer);
m_copy.setup(spacepoints_buffer)->wait();
spacepoint_collection_types::view spacepoints_view = spacepoints_buffer;

// Calculate the range to run the doublet counting for.
Expand Down
9 changes: 6 additions & 3 deletions examples/run/cuda/seeding_example_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,19 @@ int seq_run(const traccc::opts::track_seeding& seeding_opts,
traccc::spacepoint_collection_types::buffer spacepoints_cuda_buffer(
spacepoints_per_event.size(), mr.main);
async_copy(vecmem::get_data(spacepoints_per_event),
spacepoints_cuda_buffer);
spacepoints_cuda_buffer)
->ignore();
traccc::cell_module_collection_types::buffer modules_buffer(
modules_per_event.size(), mr.main);
async_copy(vecmem::get_data(modules_per_event), modules_buffer);
async_copy(vecmem::get_data(modules_per_event), modules_buffer)
->ignore();

traccc::measurement_collection_types::buffer
measurements_cuda_buffer(measurements_per_event.size(),
mr.main);
async_copy(vecmem::get_data(measurements_per_event),
measurements_cuda_buffer);
measurements_cuda_buffer)
->ignore();

{
traccc::performance::timer t("Seeding (cuda)", elapsedTimes);
Expand Down
4 changes: 2 additions & 2 deletions examples/run/cuda/seq_example_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,10 @@ int seq_run(const traccc::opts::detector& detector_opts,
// Create device copy of input collections
traccc::cell_collection_types::buffer cells_buffer(
cells_per_event.size(), mr.main);
copy(vecmem::get_data(cells_per_event), cells_buffer);
copy(vecmem::get_data(cells_per_event), cells_buffer)->ignore();
traccc::cell_module_collection_types::buffer modules_buffer(
modules_per_event.size(), mr.main);
copy(vecmem::get_data(modules_per_event), modules_buffer);
copy(vecmem::get_data(modules_per_event), modules_buffer)->ignore();

{
traccc::performance::timer t("Clusterization (cuda)",
Expand Down
12 changes: 7 additions & 5 deletions examples/run/cuda/truth_finding_example_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,10 @@ int seq_run(const traccc::opts::track_finding& finding_opts,

traccc::bound_track_parameters_collection_types::buffer seeds_buffer{
static_cast<unsigned int>(seeds.size()), mr.main};
async_copy.setup(seeds_buffer);
async_copy.setup(seeds_buffer)->ignore();
async_copy(vecmem::get_data(seeds), seeds_buffer,
vecmem::copy::type::host_to_device);
vecmem::copy::type::host_to_device)
->ignore();

// Read measurements
traccc::io::measurement_reader_output meas_reader_output(mr.host);
Expand All @@ -215,14 +216,15 @@ int seq_run(const traccc::opts::track_finding& finding_opts,
traccc::measurement_collection_types::buffer measurements_cuda_buffer(
measurements_per_event.size(), mr.main);
async_copy(vecmem::get_data(measurements_per_event),
measurements_cuda_buffer);
measurements_cuda_buffer)
->ignore();

// Instantiate output cuda containers/collections
traccc::track_candidate_container_types::buffer
track_candidates_cuda_buffer{{{}, *(mr.host)},
{{}, *(mr.host), mr.host}};
async_copy.setup(track_candidates_cuda_buffer.headers);
async_copy.setup(track_candidates_cuda_buffer.items);
async_copy.setup(track_candidates_cuda_buffer.headers)->ignore();
async_copy.setup(track_candidates_cuda_buffer.items)->ignore();

// Navigation buffer
auto navigation_buffer = detray::create_candidates_buffer(
Expand Down
2 changes: 1 addition & 1 deletion examples/run/sycl/full_chain_algorithm.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ full_chain_algorithm::output_type full_chain_algorithm::operator()(

// Get the final data back to the host.
bound_track_parameters_collection_types::host result(&m_host_mr);
(m_copy)(track_params, result);
(m_copy)(track_params, result)->ignore();
m_data->m_queue.wait_and_throw();

// Return the host container.
Expand Down
5 changes: 3 additions & 2 deletions examples/run/sycl/seeding_example_sycl.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,11 @@ int seq_run(const traccc::opts::track_seeding& seeding_opts,
traccc::spacepoint_collection_types::buffer spacepoints_sycl_buffer(
spacepoints_per_event.size(), mr.main);
copy(vecmem::get_data(spacepoints_per_event),
spacepoints_sycl_buffer);
spacepoints_sycl_buffer)
->wait();
traccc::cell_module_collection_types::buffer modules_buffer(
modules_per_event.size(), mr.main);
copy(vecmem::get_data(modules_per_event), modules_buffer);
copy(vecmem::get_data(modules_per_event), modules_buffer)->wait();

{
traccc::performance::timer t("Seeding (sycl)", elapsedTimes);
Expand Down
4 changes: 2 additions & 2 deletions examples/run/sycl/seq_example_sycl.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ int seq_run(const traccc::opts::detector& detector_opts,
// Create device copy of input collections
traccc::cell_collection_types::buffer cells_buffer(
cells_per_event.size(), mr.main);
copy(vecmem::get_data(cells_per_event), cells_buffer);
copy(vecmem::get_data(cells_per_event), cells_buffer)->wait();
traccc::cell_module_collection_types::buffer modules_buffer(
modules_per_event.size(), mr.main);
copy(vecmem::get_data(modules_per_event), modules_buffer);
copy(vecmem::get_data(modules_per_event), modules_buffer)->wait();

{
traccc::performance::timer t("Clusterization (sycl)",
Expand Down
16 changes: 9 additions & 7 deletions tests/cuda/test_ckf_combinatorics_telescope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,10 @@ TEST_P(CudaCkfCombinatoricsTelescopeTests, Run) {

traccc::bound_track_parameters_collection_types::buffer seeds_buffer{
static_cast<unsigned int>(seeds.size()), mr.main};
copy.setup(seeds_buffer);
copy.setup(seeds_buffer)->ignore();
copy(vecmem::get_data(seeds), seeds_buffer,
vecmem::copy::type::host_to_device);
vecmem::copy::type::host_to_device)
->ignore();

// Read measurements
traccc::io::measurement_reader_output readOut(&host_mr);
Expand All @@ -187,20 +188,21 @@ TEST_P(CudaCkfCombinatoricsTelescopeTests, Run) {

traccc::measurement_collection_types::buffer measurements_buffer(
measurements_per_event.size(), mr.main);
copy(vecmem::get_data(measurements_per_event), measurements_buffer);
copy(vecmem::get_data(measurements_per_event), measurements_buffer)
->ignore();

// Instantiate output cuda containers/collections
traccc::track_candidate_container_types::buffer
track_candidates_cuda_buffer{{{}, *(mr.host)},
{{}, *(mr.host), mr.host}};
copy.setup(track_candidates_cuda_buffer.headers);
copy.setup(track_candidates_cuda_buffer.items);
copy.setup(track_candidates_cuda_buffer.headers)->ignore();
copy.setup(track_candidates_cuda_buffer.items)->ignore();

traccc::track_candidate_container_types::buffer
track_candidates_limit_cuda_buffer{{{}, *(mr.host)},
{{}, *(mr.host), mr.host}};
copy.setup(track_candidates_limit_cuda_buffer.headers);
copy.setup(track_candidates_limit_cuda_buffer.items);
copy.setup(track_candidates_limit_cuda_buffer.headers)->ignore();
copy.setup(track_candidates_limit_cuda_buffer.items)->ignore();

// Navigation buffer
auto navigation_buffer = detray::create_candidates_buffer(
Expand Down
Loading
Loading