From 63e574dfd2fdb8d368d47c52056c0a02cf086b71 Mon Sep 17 00:00:00 2001 From: Pavle Josipovic Date: Fri, 17 Jan 2025 12:41:45 +0000 Subject: [PATCH] #15824 Workaround LLK issue in max_pool pack_untilize_dst is causing issues on GS and WH in case where reduce op is called after max_pool. Workaround is to use default value for block_ct_dim in pack_untilize_dst_init_short in max_pool kernel. This doesn't affect correctness of max_pool op and resolves the issue on subsequent reduce op. Perf looks good on model device perf tests. --- .../generic/device/kernels/compute/max_pool_multi_core.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/pool/generic/device/kernels/compute/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/generic/device/kernels/compute/max_pool_multi_core.cpp index 8495d680f2d..982bfe37d44 100644 --- a/ttnn/cpp/ttnn/operations/pool/generic/device/kernels/compute/max_pool_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/generic/device/kernels/compute/max_pool_multi_core.cpp @@ -107,8 +107,7 @@ void MAIN { constexpr uint32_t num_output_tiles = in_ntiles_c / in_nblocks_c; tilizeA_B_reduce_init( in_cb_id, in_scalar_cb_id, num_output_tiles, out_cb_id, num_faces_in_tile, window_size_hw); - pack_untilize_dst_init_short( - out_cb_id, num_out_rows, num_faces_in_tile); /* pack 1 row (1x16 or 1x32) */ + pack_untilize_dst_init_short(out_cb_id, num_out_rows, num_faces_in_tile); /* pack 1 row (1x16 or 1x32) */ cb_wait_front(in_scalar_cb_id, 1); for (uint32_t i = 0; i < nsticks_per_core; ++i) {