Skip to content

Commit

Permalink
making benchmark work for both cuda and hip
Browse files Browse the repository at this point in the history
  • Loading branch information
kab163 committed Nov 6, 2024
1 parent d6798e4 commit 8ae1cbe
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
17 changes: 11 additions & 6 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@ blt_add_executable(
SOURCES allocator_stress_test.cpp
DEPENDS_ON ${stress_test_depends})

if (UMPIRE_ENABLE_HIP)
if (UMPIRE_ENABLE_CUDA)
blt_add_executable(
NAME resource_aware_pool_stress_test
SOURCES resource_aware_pool_stress_test.cpp
DEPENDS_ON umpire blt::hip blt::hip_runtime)
endif()
DEPENDS_ON umpire cuda)

if (UMPIRE_ENABLE_DEVICE_ALLOCATOR)
if (UMPIRE_ENABLE_CUDA)
if (UMPIRE_ENABLE_DEVICE_ALLOCATOR)
blt_add_executable(
NAME device_allocator_stress_test
SOURCES device_allocator_stress_test.cpp
Expand All @@ -38,7 +36,14 @@ if (UMPIRE_ENABLE_DEVICE_ALLOCATOR)
set_target_properties(
device_allocator_stress_test
PROPERTIES CUDA_SEPARABLE_COMPILATION On)
elseif (UMPIRE_ENABLE_HIP)
endif()
elseif (UMPIRE_ENABLE_HIP)
blt_add_executable(
NAME resource_aware_pool_stress_test
SOURCES resource_aware_pool_stress_test.cpp
DEPENDS_ON umpire blt::hip blt::hip_runtime)

if (UMPIRE_ENABLE_DEVICE_ALLOCATOR)
blt_add_executable(
NAME device_allocator_stress_test
SOURCES device_allocator_stress_test.cpp
Expand Down
29 changes: 17 additions & 12 deletions benchmarks/resource_aware_pool_stress_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

using namespace camp::resources;

#if defined(UMPIRE_ENABLE_CUDA)
using resource_type = Cuda;
#elif defined(UMPIRE_ENABLE_HIP)
using resource_type = Hip;
#endif

constexpr int ITER = 5;
constexpr int NUM = 2048;
const int NUM_PER_BLOCK = 256;
Expand Down Expand Up @@ -72,19 +78,19 @@ void QuickPool_check(umpire::Allocator quick_pool)
bool error{false};

// Create hip streams
hipStream_t s1, s2;
hipStreamCreate(&s1); hipStreamCreate(&s2);
auto s1 = resource_type().get_stream();
auto s2 = resource_type().get_stream();

double* a = static_cast<double*>(quick_pool.allocate(NUM * sizeof(double)));

hipLaunchKernelGGL(touch_data, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, s1, a);
hipLaunchKernelGGL(do_sleep, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, s1);
hipLaunchKernelGGL(check_data, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, s1, a);
touch_data<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, s1>>>(a);
do_sleep<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, s1>>>();
check_data<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, s1>>>(a);

quick_pool.deallocate(a);
a = static_cast<double*>(quick_pool.allocate(NUM * sizeof(double)));

hipLaunchKernelGGL(touch_data_again, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, s2, a);
touch_data_again<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, s2>>>(a);

double* b = static_cast<double*>(quick_pool.allocate(NUM * sizeof(double)));
rm.copy(b, a);
Expand All @@ -105,13 +111,12 @@ void QuickPool_check(umpire::Allocator quick_pool)

quick_pool.deallocate(a);
rm.deallocate(b);
hipStreamDestroy(s1); hipStreamDestroy(s2);
}

void ResourceAwarePool_check(umpire::Allocator rap_pool)
{
// Create hip resources
Hip d1, d2;
resource_type d1, d2;
Resource r1{d1}, r2{d2};

// ResourceAwarePool checks
Expand All @@ -121,14 +126,14 @@ void ResourceAwarePool_check(umpire::Allocator rap_pool)
for(int i = 0; i < ITER; i++) {
double* a = static_cast<double*>(rap_pool.allocate(r1, NUM * sizeof(double)));

hipLaunchKernelGGL(touch_data, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, d1.get_stream(), a);
hipLaunchKernelGGL(do_sleep, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, d1.get_stream());
hipLaunchKernelGGL(check_data, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, d1.get_stream(), a);
touch_data<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, d1.get_stream()>>>(a);
do_sleep<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, d1.get_stream()>>>();
check_data<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, d1.get_stream()>>>(a);

rap_pool.deallocate(r1, a);
a = static_cast<double*>(rap_pool.allocate(r2, NUM * sizeof(double)));

hipLaunchKernelGGL(touch_data_again, dim3(NUM_BLOCKS), dim3(NUM_PER_BLOCK), 0, d2.get_stream(), a);
touch_data_again<<<NUM_BLOCKS, NUM_PER_BLOCK, 0, d2.get_stream()>>>(a);

double* b = static_cast<double*>(rap_pool.allocate(r2, NUM * sizeof(double)));
rm.copy(b, a);
Expand Down

0 comments on commit 8ae1cbe

Please sign in to comment.