Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes cudaErrorInvalidValue when running on nvbench-created cuda stream #113

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
8 changes: 7 additions & 1 deletion nvbench/detail/measure_cold.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,13 @@ namespace nvbench::detail

measure_cold_base::measure_cold_base(state &exec_state)
: m_state{exec_state}
, m_launch{m_state.get_cuda_stream()}
, m_launch{nvbench::launch([this]() -> decltype(auto) {
if (!m_state.get_cuda_stream().has_value())
{
m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()});
}
return m_state.get_cuda_stream().value();
}())}
, m_run_once{exec_state.get_run_once()}
, m_no_block{exec_state.get_disable_blocking_kernel()}
, m_min_samples{exec_state.get_min_samples()}
Expand Down
8 changes: 7 additions & 1 deletion nvbench/detail/measure_cupti.cu
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,13 @@ measure_cupti_base::measure_cupti_base(state &exec_state)
// (formatter doesn't handle `try :` very well...)
try
: m_state{exec_state}
, m_launch{m_state.get_cuda_stream()}
, m_launch{[this]() -> decltype(auto) {
if (!m_state.get_cuda_stream().has_value())
{
m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()});
}
return m_state.get_cuda_stream().value();
}()}
, m_cupti{*m_state.get_device(), add_metrics(m_state)}
{}
// clang-format on
Expand Down
8 changes: 7 additions & 1 deletion nvbench/detail/measure_hot.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ namespace nvbench::detail

measure_hot_base::measure_hot_base(state &exec_state)
: m_state{exec_state}
, m_launch{m_state.get_cuda_stream()}
, m_launch{nvbench::launch([this]() -> decltype(auto) {
if (!m_state.get_cuda_stream().has_value())
{
m_state.set_cuda_stream(nvbench::cuda_stream{m_state.get_device()});
}
return m_state.get_cuda_stream().value();
Comment on lines +41 to +45
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels weird to have the initialization of the optional external to state.

How about putting this logic inside state::get_cuda_stream instead and don't expose the optional externally.

Copy link
Author

@elstehle elstehle Feb 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about putting this logic inside state::get_cuda_stream instead and don't expose the optional externally.

@allisonvacanti and I have discussed that option too but agreed to prefer explicitly setting the stream over implicitly initializing it as a byproduct, if it didn't exist. Considering the user interfacing with the API, I feel that, for multi-GPU systems, it's safer to make it explicit when resources are created and what device they are associated with. Especially, when the current device may influence what device a resource is associated with.

That said, I'm fine to have it any way we decide makes more sense. @allisonvacanti what do you think?

}())}
, m_min_samples{exec_state.get_min_samples()}
, m_min_time{exec_state.get_min_time()}
, m_skip_time{exec_state.get_skip_time()}
Expand Down
7 changes: 5 additions & 2 deletions nvbench/state.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ struct state
state &operator=(const state &) = delete;
state &operator=(state &&) = default;

[[nodiscard]] const nvbench::cuda_stream &get_cuda_stream() const { return m_cuda_stream; }
[[nodiscard]] const std::optional<nvbench::cuda_stream> &get_cuda_stream() const
{
return m_cuda_stream;
}
void set_cuda_stream(nvbench::cuda_stream &&stream) { m_cuda_stream = std::move(stream); }

/// The CUDA device associated with with this benchmark state. May be
Expand Down Expand Up @@ -276,7 +279,7 @@ private:
nvbench::float64_t m_skip_time;
nvbench::float64_t m_timeout;

nvbench::cuda_stream m_cuda_stream;
std::optional<nvbench::cuda_stream> m_cuda_stream;

// Deadlock protection. See blocking_kernel's class doc for details.
nvbench::float64_t m_blocking_kernel_timeout{30.0};
Expand Down
2 changes: 1 addition & 1 deletion nvbench/state.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ state::state(const benchmark_base &bench,
, m_max_noise{bench.get_max_noise()}
, m_skip_time{bench.get_skip_time()}
, m_timeout{bench.get_timeout()}
, m_cuda_stream{m_device}
, m_cuda_stream{std::nullopt}
{}

nvbench::int64_t state::get_int64(const std::string &axis_name) const
Expand Down
6 changes: 4 additions & 2 deletions testing/state.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ struct state_tester : public nvbench::state
void set_param(std::string name, T &&value)
{
this->state::m_axis_values.set_value(std::move(name),
nvbench::named_values::value_type{
std::forward<T>(value)});
nvbench::named_values::value_type{std::forward<T>(value)});
}
};
} // namespace nvbench::detail
Expand All @@ -57,6 +56,9 @@ void test_streams()

state_tester state{bench};

// Confirm that the stream hasn't been initialized yet
ASSERT(!state.get_cuda_stream().has_value());

// Test non-owning stream
cudaStream_t default_stream = 0;
state.set_cuda_stream(nvbench::cuda_stream{default_stream, false});
Expand Down