diff --git a/megatron/fused_kernels/type_shim.h b/megatron/fused_kernels/type_shim.h index 6b1c8927d..c8dbd1f10 100644 --- a/megatron/fused_kernels/type_shim.h +++ b/megatron/fused_kernels/type_shim.h @@ -277,7 +277,7 @@ reduce_block_into_lanes(T* x, final = x[tid] + x[tid + 32]; else final = val; - // __SYNCWARP(); + // __SYNCWARP(); #pragma unroll for (int i = 16; i >= lanes; i >>= 1) @@ -321,7 +321,7 @@ reduce_block_into_lanes_max_op(T* x, final = fmaxf(fabsf(x[tid]), fabsf(x[tid + 32])); else final = val; - // __SYNCWARP(); + // __SYNCWARP(); #pragma unroll for (int i = 16; i >= lanes; i >>= 1)