diff --git a/examples/sycl/pvc/pvc_collective_builder.cpp b/examples/sycl/pvc/pvc_collective_builder.cpp index 420fa4a8b..fc49e182f 100644 --- a/examples/sycl/pvc/pvc_collective_builder.cpp +++ b/examples/sycl/pvc/pvc_collective_builder.cpp @@ -321,7 +321,7 @@ int main(int argc, const char** argv) using LayoutD = cutlass::layout::RowMajor; // Workgroup-level tile - using TileShape = Shape<_256, _256, _32>; + using TileShape = Shape<_256, _128, _16>; using CollectiveMainloop = cutlass::gemm::collective::CollectiveBuilder< cutlass::arch::IntelPVC, cutlass::arch::OpClassTensorOp, diff --git a/include/cutlass/gemm/collective/builders/xe_mma_builder.inl b/include/cutlass/gemm/collective/builders/xe_mma_builder.inl index fd94df25b..58128c72e 100644 --- a/include/cutlass/gemm/collective/builders/xe_mma_builder.inl +++ b/include/cutlass/gemm/collective/builders/xe_mma_builder.inl @@ -86,14 +86,14 @@ struct CollectiveBuilder< //Prepare Template arguments required of CollectiveMainLoop using TiledMma = TiledMMA, - Layout>, - Tile<_32,_64,_32>>; // Subgroup level-tile + Layout>, + Tile<_64,_32,_16>>; // Subgroup level-tile static constexpr int PipelineStages = 3; using DispatchPolicy = cutlass::gemm::MainloopIntelPVC; - using GmemTiledCopyA = XE_2D_U16x8x16x4x2_LD_N; - using GmemTiledCopyB = XE_2D_U16x16x16x2x2_V; + using GmemTiledCopyA = XE_2D_U16x8x16_LD_N; + using GmemTiledCopyB = XE_2D_U16x16x16_LD_V; //PVC pipeline does not use shared memory using SmemLayoutAtomA = void;