Skip to content

Commit

Permalink
benchdnn: fix incorrect profiling reset
Browse files Browse the repository at this point in the history
  • Loading branch information
echeresh committed Oct 23, 2024
1 parent 71e41c2 commit 5b20f33
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 13 deletions.
39 changes: 29 additions & 10 deletions tests/benchdnn/dnnl_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,8 @@ void reset_gpu_profiling(dnnl_stream_t stream) {
#endif
}

void get_gpu_profiling_info(dnnl_stream_t stream, std::vector<uint64_t> &nsecs,
std::vector<uint64_t> &cycles) {
int get_gpu_profiling_info(dnnl_stream_t stream, std::vector<uint64_t> &nsecs,
std::vector<uint64_t> &cycles, int expected_num_entries) {
#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL \
|| DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL
dnnl_profiling_data_kind_t undef_kind {};
Expand All @@ -420,15 +420,29 @@ void get_gpu_profiling_info(dnnl_stream_t stream, std::vector<uint64_t> &nsecs,
#endif

int num_entries = 0;
DNN_SAFE_V(dnnl_query_profiling_data(
stream, undef_kind, &num_entries, nullptr));
DNN_SAFE(dnnl_query_profiling_data(
stream, undef_kind, &num_entries, nullptr),
CRIT);
if (expected_num_entries != -1 && num_entries != expected_num_entries) {
BENCHDNN_PRINT(0,
"ERROR: profiling entries mismatch, expected: %d entries but "
"got %d entries\n",
expected_num_entries, num_entries);
return FAIL;
}
DNN_SAFE(dnnl_query_profiling_data(
stream, time_kind, &num_entries, nsecs.data()),
CRIT);
nsecs.resize(num_entries);
cycles.resize(num_entries);
DNN_SAFE_V(dnnl_query_profiling_data(
stream, time_kind, &num_entries, nsecs.data()));
DNN_SAFE_V(dnnl_query_profiling_data(
stream, cycles_kind, &num_entries, cycles.data()));
DNN_SAFE(dnnl_query_profiling_data(
stream, time_kind, &num_entries, nsecs.data()),
CRIT);
DNN_SAFE(dnnl_query_profiling_data(
stream, cycles_kind, &num_entries, cycles.data()),
CRIT);
#endif
return OK;
}

void notify_gpu_profiling_complete(dnnl_stream_t stream) {
Expand Down Expand Up @@ -476,8 +490,8 @@ inline int measure_perf_aggregate(timer::timer_t &t,
// kernel has not been built and skews the results.
DNN_SAFE(perf_func(v_stream[j], dnnl_args[j]), WARN);
DNN_SAFE(dnnl_stream_wait(v_stream[j]), CRIT);
if (use_profiling) reset_gpu_profiling(v_stream[j]);
cold_cache[j] = cold_cache_t(dnnl_args[j], v_stream[j]);
if (use_profiling) reset_gpu_profiling(v_stream[j]);
}

bool is_first_loop = true;
Expand All @@ -486,11 +500,14 @@ inline int measure_perf_aggregate(timer::timer_t &t,

t.reset();
while (true) {
// Keep separate var due to a `break` inside the loop.
int execute_count = 0;
// Keep inner loop over streams for better submission overlapping.
for_(int i = 0; i < cur_batch_times; i++)
for (size_t j = 0; j < v_stream.size(); j++) {
if (!cold_cache[j].update_dnnl_args(dnnl_args[j])) break;
DNN_SAFE(perf_func(v_stream[j], dnnl_args[j]), WARN);
execute_count++;
}

for (size_t j = 0; j < v_stream.size(); j++) {
Expand All @@ -502,7 +519,9 @@ inline int measure_perf_aggregate(timer::timer_t &t,
std::vector<std::vector<uint64_t>> v_cycles(num_streams);
bool nsecs_is_empty = false;
for (size_t j = 0; j < v_stream.size(); j++) {
get_gpu_profiling_info(v_stream[j], v_nsecs[j], v_cycles[j]);
SAFE(get_gpu_profiling_info(v_stream[j], v_nsecs[j],
v_cycles[j], execute_count),
CRIT);
reset_gpu_profiling(v_stream[j]);

// Profiling should have information to report, otherwise, stop.
Expand Down
4 changes: 2 additions & 2 deletions tests/benchdnn/dnnl_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,8 @@ void reset_gpu_profiling(dnnl_stream_t stream);

void finalize();

void get_gpu_profiling_info(dnnl_stream_t stream, std::vector<uint64_t> &nsecs,
std::vector<uint64_t> &cycles);
int get_gpu_profiling_info(dnnl_stream_t stream, std::vector<uint64_t> &nsecs,
std::vector<uint64_t> &cycles, int expected_num_entries);
int measure_perf(const thr_ctx_t &ctx, res_t *res, perf_function_t &perf_func,
args_t &args);
int measure_perf(
Expand Down
6 changes: 5 additions & 1 deletion tests/benchdnn/graph/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,11 @@ inline int measure_perf_aggregate(timer::timer_t &t,
if (use_profiling) {
std::vector<uint64_t> nsecs;
std::vector<uint64_t> cycles;
get_gpu_profiling_info(((dnnl::stream)stream).get(), nsecs, cycles);
// Cannot determine the number of expected profiling entries
// beforehand so pass -1.
SAFE(get_gpu_profiling_info(((dnnl::stream)stream).get(), nsecs,
cycles, /*expected_num_entries=*/-1),
CRIT);
reset_gpu_profiling(((dnnl::stream)stream).get());

// Profiling should have information to report, otherwise, stop.
Expand Down

0 comments on commit 5b20f33

Please sign in to comment.