Skip to content

Commit

Permalink
Fix PVC benchmark (#99)
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz authored Jul 16, 2024
1 parent a06fe09 commit 820792e
Showing 1 changed file with 21 additions and 14 deletions.
35 changes: 21 additions & 14 deletions benchmarks/pvc/bench_pvc_gemm_bf16_bf16_fp32_dpas_fp32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@

#include "../common/benchmark_runner.hpp"

#include "cutlass/epilogue/collective/default_epilogue.hpp"
#include "cutlass/epilogue/collective/intel_pvc_epilogue.hpp"
#include "cutlass/epilogue/fusion/intel_pvc_callbacks.hpp"

using namespace cute;

///////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -91,27 +95,30 @@ int main(int argc, const char** argv)
using GmemTiledCopyA = XE_2D_U16x8x16x4x2_LD_N;
using GmemTiledCopyB = XE_2D_U16x16x16x2x1_LD_N;

using DispatchPolicy = cutlass::gemm::MainloopIntelPVCUnpredicated;
using GEMMDispatchPolicy = cutlass::gemm::MainloopIntelPVCUnpredicated;
using EpilogueDispatchPolicy = cutlass::epilogue::IntelPVCEpilogue;

// This code section describes the epilogue part of the kernel
using EpilogueOp = cutlass::epilogue::thread::LinearCombination<
ElementOutput, // <- data type of output matrix
128 / cutlass::sizeof_bits<ElementOutput>::value, // <- the number of elements per vectorized
// memory access. For a byte, it's 16
// elements. This becomes the vector width of
// math instructions in the epilogue too
ElementAccumulator, // <- data type of accumulator
ElementComputeEpilogue>; // <- data type for alpha/beta in linear combination function
using EpilogueOp = cutlass::epilogue::fusion::LinearCombination<ElementOutput, ElementComputeEpilogue,
ElementAccumulator, ElementAccumulator, cutlass::FloatRoundStyle::round_to_nearest>;

using CollectiveEpilogue = cutlass::epilogue::collective::DefaultEpilogue<
using FusionCallBacks = cutlass::epilogue::fusion::FusionCallbacks<EpilogueDispatchPolicy, EpilogueOp, TileShape,
decltype(tile_shape(TiledMma()))>;
using CollectiveEpilogue = cutlass::epilogue::collective::CollectiveEpilogue<
EpilogueDispatchPolicy,
TileShape,
ElementAccumulator,
cutlass::gemm::TagToStrideC_t<LayoutC>,
ElementOutput,
cutlass::gemm::TagToStrideC_t<LayoutD>,
EpilogueOp,
cutlass::gemm::EpilogueDefault>;
FusionCallBacks,
XE_2D_U32x8x16x1x1_LD_N,
void, void,
XE_2D_U32x8x16x1x1_ST_N,
void, void>;

// Mainloop
using CollectiveMainloop = cutlass::gemm::collective::CollectiveMma<
DispatchPolicy,
GEMMDispatchPolicy,
TileShape,
ElementInputA,
cutlass::gemm::TagToStrideA_t<LayoutA>,
Expand Down

0 comments on commit 820792e

Please sign in to comment.