Skip to content

Commit

Permalink
Restore invalid changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiaxingla committed Jul 17, 2024
1 parent 3496593 commit 69d5c2a
Showing 1 changed file with 2 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ __global__ void
size_t idx = ThreadIdxX() + BlockDimX() * BlockIdxX();

for (; idx < capacity; idx += GridDimX() * BlockDimX()) {

Element a = cutlass::ReferenceFactory<Element>::get(ptr_A, idx);
Element b = cutlass::ReferenceFactory<Element>::get(ptr_B, idx);

Expand Down Expand Up @@ -238,7 +239,7 @@ bool BlockCompareRelativelyEqual(
#if defined (CUTLASS_ENABLE_SYCL)
block_size = 128;
grid_size = (capacity + block_size - 1) / block_size;
//grid_size = (grid_size < 64 ? grid_size : 64); // limit grid size to avoid out_of_resources runtime error.
grid_size = (grid_size < 64 ? grid_size : 64); // limit grid size to avoid out_of_resources runtime error.
#else
// if grid_size or block_size are zero, query occupancy using the CUDA Occupancy API
cudaError_t result = cudaOccupancyMaxPotentialBlockSize(
Expand Down

0 comments on commit 69d5c2a

Please sign in to comment.