Skip to content

Commit

Permalink
Error out of counter splitting if group max is zero
Browse files Browse the repository at this point in the history
A counter group having a counter max of zero is invalid and
will ultimately result in a hang when we try to split counters
into multiple passes. This is one of various scenarios that
result in a hang during counter splitting; see

 GPUOpen-Tools#69

This fixes only that specific scenario. We now check that the
group max isn't zero, and if it is, we give up trying to split
a public counter's HW counters into multiple passes. We log
an error, too.

Again, this isn't a comprehensive fix for issue 69. There could
be other cases of bad data that result in a hang. Issue 69 should
be fixed with a pass cap limit to cover all cases. But this
commit still adds value in that it flags the specific invalid GPU
counter metadata in addition to avoiding the hang.

Change-Id: I56d7d2043ba92c1b6088f0fdd68f5ec844e7b823
  • Loading branch information
jcortell68 committed Jan 6, 2023
1 parent 57f4eba commit 798d7ab
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
1 change: 1 addition & 0 deletions include/gpu_performance_api/gpu_perf_api_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ typedef enum
kGpaStatusErrorLibAlreadyLoaded = -41,
kGpaStatusErrorOtherSessionActive = -42,
kGpaStatusErrorException = -43,
gGpaInvalidCounterGroupData = -44,
kGpaStatusMin = kGpaStatusErrorException,
kGpaStatusInternal = 256, ///< Status codes used internally within GPUPerfAPI.
} GpaStatus;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,19 +308,37 @@ GpaStatus GpaCounterSchedulerBase::GetNumRequiredPasses(GpaUInt32* num_required_
// Add the HW groups max's.
for (unsigned int i = 0; i < hw_counters->internal_counter_groups_.size(); ++i)
{
max_counters_per_group.push_back(hw_counters->internal_counter_groups_[i].max_active_discrete_counters);
auto count = hw_counters->internal_counter_groups_[i].max_active_discrete_counters;
if (count == 0)
{
GPA_LOG_DEBUG_ERROR("ERROR: hardware counter group '%s' has zero for max-counters-per-group", hw_counters->internal_counter_groups_[i].name);
return gGpaInvalidCounterGroupData;
}
max_counters_per_group.push_back(count);
}

// Add the Additional groups max's.
for (unsigned int i = 0; i < hw_counters->additional_group_count_; ++i)
{
max_counters_per_group.push_back(hw_counters->additional_groups_[i].max_active_discrete_counters);
auto count = hw_counters->additional_groups_[i].max_active_discrete_counters;
if (count == 0)
{
GPA_LOG_DEBUG_ERROR("ERROR: hardware counter additional group '%s' has zero for max-counters-per-group", hw_counters->additional_groups_[i].name);
return gGpaInvalidCounterGroupData;
}
max_counters_per_group.push_back(count);
}

// TODO: properly handle software groups -- right now, this works because there is only ever a single group defined.
if (sw_counters->group_count_ == 1)
{
max_counters_per_group.push_back(DoGetNumSoftwareCounters());
auto count = DoGetNumSoftwareCounters();
if (count == 0)
{
GPA_LOG_DEBUG_ERROR("ERROR: software counter group has zero for max-counters-per-group");
return gGpaInvalidCounterGroupData;
}
max_counters_per_group.push_back(count);
}

GpaCounterGroupAccessor accessor(hw_counters->internal_counter_groups_,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

#ifdef DEBUG_PUBLIC_COUNTER_SPLITTER
#include <sstream>
#include "gpu_perf_api_common/logging.h"
#endif

#include "gpu_perf_api_counter_generator/gpa_derived_counter.h"
#include "gpu_perf_api_common/logging.h"

/// @brief Enum to represent the different SQ shader stages.
enum GpaSqShaderStage
Expand Down Expand Up @@ -390,6 +390,11 @@ class IGpaSplitCounters
}

unsigned int group_limit = max_counters_per_group[group_index];
if (group_limit == 0)
{
GPA_LOG_DEBUG_ERROR("ERROR: group(%d) count limit is zero", group_index);
return false;
}

return new_group_used_count <= group_limit;
}
Expand Down

0 comments on commit 798d7ab

Please sign in to comment.