From 774286dc2b07341634e31b941fc4a215198b0402 Mon Sep 17 00:00:00 2001 From: Muhammad Tanvir Date: Fri, 5 Jan 2024 00:12:27 +0000 Subject: [PATCH] Unrolled the Global Memory write loops --- src/operations/blas3/gemm_local_joint_matrix.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/operations/blas3/gemm_local_joint_matrix.hpp b/src/operations/blas3/gemm_local_joint_matrix.hpp index 0b2569790..a4e7dc17d 100644 --- a/src/operations/blas3/gemm_local_joint_matrix.hpp +++ b/src/operations/blas3/gemm_local_joint_matrix.hpp @@ -535,6 +535,7 @@ class Gemm 1 ? tile_type::joint_matrix_N : block_cols; +#pragma unroll for (index_t frag = 0; frag < frags_per_sg; frag++, C += output_global_outer_offset, nc -= tile_type::joint_matrix_N) { @@ -559,6 +560,7 @@ class Gemm= block_rows && nc >= nc_conditional) { const index_t loop_limit = nc_conditional / rows_per_iter; +#pragma unroll for (int i = 0; i < loop_limit; i++, new_C += output_global_inner_offset, new_scratch += output_local_inner_offset) { @@ -574,6 +576,7 @@ class Gemm