Skip to content

Commit

Permalink
Fixed synchronization for double buffering
Browse files Browse the repository at this point in the history
  • Loading branch information
muhammad-tanvir-1211 committed Jan 31, 2024
1 parent d427349 commit eb628b6
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions src/operations/blas3/gemm_local_joint_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,11 @@ class Gemm<input_t, output_t, DoubleBuffer, NbcA, NbcB, ClSize, TileType,
A += cl_elems * (trans_a ? 1 : lda);
B += cl_elems * (trans_b ? ldb : 1);

sync_smem<double_buffer, ldsb * block_cols, ldsb * block_cols,
ldsa * cl_elems, ldsa * cl_elems>(id, ofs, s1, s2, s3, s4);
sync_smem<double_buffer, ldsb *(trans_b ? cl_elems : block_cols),
ldsb *(trans_b ? cl_elems : block_cols),
ldsa *(trans_a ? block_rows : cl_elems),
ldsa *(trans_a ? block_rows : cl_elems)>(id, ofs, s1, s2, s3,
s4);
k -= cl_elems;
}

Expand All @@ -460,8 +463,11 @@ class Gemm<input_t, output_t, DoubleBuffer, NbcA, NbcB, ClSize, TileType,
id.barrier(cl::sycl::access::fence_space::local_space);
compute_block_gemm(id, s2, s4, reg_res);

sync_smem<double_buffer, ldsb * block_cols, ldsb * block_cols,
ldsa * cl_elems, ldsa * cl_elems>(id, ofs, s1, s2, s3, s4);
sync_smem<double_buffer, ldsb *(trans_b ? cl_elems : block_cols),
ldsb *(trans_b ? cl_elems : block_cols),
ldsa *(trans_a ? block_rows : cl_elems),
ldsa *(trans_a ? block_rows : cl_elems)>(id, ofs, s1, s2, s3,
s4);
}

// store the output
Expand Down

0 comments on commit eb628b6

Please sign in to comment.