From fff995f3c901d1100d56309340740edbba7e874f Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 11 Oct 2023 14:00:56 +0100 Subject: [PATCH] [compiler] Move IVs into the CreateLoopOpts struct This deprioritises the IVs parameter of `createLoop`, in the case that users don't wish to set it and are happy with the default of 'none'. It also allows us to more cohesively provide a second options field which allows users to set the IV names, rather than a) adding an additional parameter to `createLoop` or b) having related values split across `createLoop` and `CreateLoopOpts`. --- CHANGELOG.md | 3 + .../refsi_g1_wi/source/refsi_wg_loop_pass.cpp | 7 +- .../targets/host/source/AddEntryHook.cpp | 6 +- .../include/compiler/utils/pass_functions.h | 36 ++++--- .../utils/source/mux_builtin_info.cpp | 20 ++-- .../compiler/utils/source/pass_functions.cpp | 9 +- .../utils/source/work_item_loops_pass.cpp | 101 ++++++++---------- 7 files changed, 97 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8ff565d6..e05c4570d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,9 @@ Upgrade guidance: `compiler::utils::LowerToMuxBuiltinsPass`. * The `compiler::utils::HandleBarriersPass` has been renamed to the `compiler::utils::WorkItemLoopsPass`. +* The `compiler::utils::createLoop` API has moved its list of `IVs` parameter + into its `compiler::utils::CreateLoopOpts` parameter. It can now also set the + IV names via a second `CreateLoopOpts` field. ## Version 3.0.0 diff --git a/examples/refsi/refsi_g1_wi/compiler/refsi_g1_wi/source/refsi_wg_loop_pass.cpp b/examples/refsi/refsi_g1_wi/compiler/refsi_g1_wi/source/refsi_wg_loop_pass.cpp index 4e658a3e7..1d7eeaaf0 100644 --- a/examples/refsi/refsi_g1_wi/compiler/refsi_g1_wi/source/refsi_wg_loop_pass.cpp +++ b/examples/refsi/refsi_g1_wi/compiler/refsi_g1_wi/source/refsi_wg_loop_pass.cpp @@ -126,7 +126,7 @@ PreservedAnalyses RefSiWGLoopPass::run(Module &M, ModuleAnalysisManager &AM) { // looping through num groups in the outermost dimension auto *const exitBlock = compiler::utils::createLoop( loopPreheaderIR.GetInsertBlock(), nullptr, zero, numGroups[outer_dim], - {}, create_loop_opts, + create_loop_opts, [&](BasicBlock *blockz, Value *z, ArrayRef, MutableArrayRef) -> BasicBlock * { IRBuilder<> ir(blockz); @@ -136,8 +136,7 @@ PreservedAnalyses RefSiWGLoopPass::run(Module &M, ModuleAnalysisManager &AM) { {i32_0, ir.getInt32(outer_dim)})); // looping through num groups in the middle dimension return compiler::utils::createLoop( - blockz, nullptr, zero, numGroups[middle_dim], {}, - create_loop_opts, + blockz, nullptr, zero, numGroups[middle_dim], create_loop_opts, [&](BasicBlock *blocky, Value *y, ArrayRef, MutableArrayRef) -> BasicBlock * { IRBuilder<> ir(blocky); @@ -147,7 +146,7 @@ PreservedAnalyses RefSiWGLoopPass::run(Module &M, ModuleAnalysisManager &AM) { // looping through num groups in the x dimension return compiler::utils::createLoop( - blocky, nullptr, zero, numGroups[inner_dim], {}, + blocky, nullptr, zero, numGroups[inner_dim], create_loop_opts, [&](BasicBlock *blockx, Value *x, ArrayRef, MutableArrayRef) -> BasicBlock * { diff --git a/modules/compiler/targets/host/source/AddEntryHook.cpp b/modules/compiler/targets/host/source/AddEntryHook.cpp index 95e9b5f76..641c3b059 100644 --- a/modules/compiler/targets/host/source/AddEntryHook.cpp +++ b/modules/compiler/targets/host/source/AddEntryHook.cpp @@ -177,7 +177,7 @@ PreservedAnalyses AddEntryHookPass::run(Module &M, ModuleAnalysisManager &AM) { // looping through num groups in the outermost dimension auto exitBlock = compiler::utils::createLoop( - loopIR.GetInsertBlock(), nullptr, zero, numGroups[outer_dim], {}, opts, + loopIR.GetInsertBlock(), nullptr, zero, numGroups[outer_dim], opts, [&](BasicBlock *blockz, Value *z, ArrayRef, MutableArrayRef) -> BasicBlock * { IRBuilder<> ir(blockz); @@ -187,7 +187,7 @@ PreservedAnalyses AddEntryHookPass::run(Module &M, ModuleAnalysisManager &AM) { {i32_0, ir.getInt32(outer_dim)})); // looping through num groups in the middle dimension return compiler::utils::createLoop( - blockz, nullptr, zero, numGroups[middle_dim], {}, opts, + blockz, nullptr, zero, numGroups[middle_dim], opts, [&](BasicBlock *blocky, Value *y, ArrayRef, MutableArrayRef) -> BasicBlock * { IRBuilder<> ir(blocky); @@ -197,7 +197,7 @@ PreservedAnalyses AddEntryHookPass::run(Module &M, ModuleAnalysisManager &AM) { // looping through num groups in the x dimension return compiler::utils::createLoop( - blocky, nullptr, sliceStart, clampedSliceEnd, {}, opts, + blocky, nullptr, sliceStart, clampedSliceEnd, opts, [&](BasicBlock *blockx, Value *x, ArrayRef, MutableArrayRef) -> BasicBlock * { IRBuilder<> ir(blockx); diff --git a/modules/compiler/utils/include/compiler/utils/pass_functions.h b/modules/compiler/utils/include/compiler/utils/pass_functions.h index 770fbea31..048cf1a0e 100644 --- a/modules/compiler/utils/include/compiler/utils/pass_functions.h +++ b/modules/compiler/utils/include/compiler/utils/pass_functions.h @@ -182,6 +182,19 @@ struct CreateLoopOpts { /// @brief headerName Optional name for the loop header block. Defaults to: /// "loopIR". llvm::StringRef headerName = "loopIR"; + /// @brief An optional list of incoming IV values. + /// + /// Each of these is used as the incoming value to a PHI created by + /// createLoop. These PHIs are provided to the 'body' function of createLoop, + /// which should in turn set the 'next' version of the IV. + std::vector IVs; + /// @brief An optional list of IV names, to be set on the PHIs provided by + /// 'IVs' field/parameter. + /// + /// If set, the names are assumed to correlate 1:1 with those IVs. The list + /// may be shorter than the list of IVs, in which case the trailing IVs are + /// not named. + std::vector loopIVNames; }; /// @brief Create a loop around a body, creating an implicit induction variable @@ -200,23 +213,22 @@ struct CreateLoopOpts { /// @param exit Loop exit block. The new loop will jump to this once it exits. /// @param indexStart The start index /// @param indexEnd The end index (we compare for <) -/// @param ivs A list of extra induction variables to create. /// @param opts Set of options configuring the generation of this loop. -/// @param body Body of code to insert into loop. The parameters of this -/// function are as follows: the loop body BasicBlock; the Value corresponding -/// to the IV beginning at `indexStart` and incremented each iteration by -/// `indexInc` while less than `indexEnd`; the list of IVs for this iteration -/// of the loop (may or may not be PHIs, depending on the loop bounds); the -/// list of IVs for the next iteration of the loop (the function is required to -/// fill these in). Both these sets of IVs will be arrays of equal length to -/// the original list of IVs, in the same order. The function returns the loop -/// latch/exiting block: this block will be given the branch that decides -/// between continuing the loop and exiting from it. +/// @param body Body of code to insert into loop. +/// +/// The parameters of this function are as follows: the loop body BasicBlock; +/// the Value corresponding to the IV beginning at `indexStart` and incremented +/// each iteration by `indexInc` while less than `indexEnd`; the list of IVs +/// for this iteration of the loop (may or may not be PHIs, depending on the +/// loop bounds); the list of IVs for the next iteration of the loop (the +/// function is required to fill these in). Both these sets of IVs will be +/// arrays of equal length to the original list of IVs, in the same order. The +/// function returns the loop latch/exiting block: this block will be given the +/// branch that decides between continuing the loop and exiting from it. /// /// @return llvm::BasicBlock* The exit block llvm::BasicBlock *createLoop(llvm::BasicBlock *entry, llvm::BasicBlock *exit, llvm::Value *indexStart, llvm::Value *indexEnd, - llvm::ArrayRef ivs, const CreateLoopOpts &opts, CreateLoopBodyFn body); /// @brief Get the last argument of a function. diff --git a/modules/compiler/utils/source/mux_builtin_info.cpp b/modules/compiler/utils/source/mux_builtin_info.cpp index 9f3caedf6..421883385 100644 --- a/modules/compiler/utils/source/mux_builtin_info.cpp +++ b/modules/compiler/utils/source/mux_builtin_info.cpp @@ -591,12 +591,13 @@ static BasicBlock *copy1D(Module &M, BasicBlock &ParentBB, Value *DstPtr, assert(DstPtr->getType()->isPointerTy() && "Mux DMA builtins are always byte-accessed"); - Value *DmaIVs[] = {SrcPtr, DstPtr}; + compiler::utils::CreateLoopOpts opts; + opts.IVs = {SrcPtr, DstPtr}; + opts.loopIVNames = {"dma.src", "dma.dst"}; // This is a simple loop copy a byte at a time from SrcPtr to DstPtr. BasicBlock *ExitBB = compiler::utils::createLoop( - &ParentBB, nullptr, ConstantInt::get(getSizeType(M), 0), NumBytes, DmaIVs, - compiler::utils::CreateLoopOpts{}, + &ParentBB, nullptr, ConstantInt::get(getSizeType(M), 0), NumBytes, opts, [&](BasicBlock *BB, Value *X, ArrayRef IVsCurr, MutableArrayRef IVsNext) { IRBuilder<> B(BB); @@ -625,12 +626,13 @@ static BasicBlock *copy2D(Module &M, BasicBlock &ParentBB, Value *DstPtr, assert(DstPtr->getType()->isPointerTy() && "Mux DMA builtins are always byte-accessed"); - Value *DmaIVs[] = {SrcPtr, DstPtr}; + compiler::utils::CreateLoopOpts opts; + opts.IVs = {SrcPtr, DstPtr}; + opts.loopIVNames = {"dma.src", "dma.dst"}; // This is a loop over the range of lines, calling a 1D copy on each line BasicBlock *ExitBB = compiler::utils::createLoop( - &ParentBB, nullptr, ConstantInt::get(getSizeType(M), 0), NumLines, DmaIVs, - compiler::utils::CreateLoopOpts{}, + &ParentBB, nullptr, ConstantInt::get(getSizeType(M), 0), NumLines, opts, [&](BasicBlock *block, Value *, ArrayRef IVsCurr, MutableArrayRef IVsNext) { IRBuilder<> loopIr(block); @@ -735,12 +737,14 @@ Function *BIMuxInfoConcept::defineDMA3D(Function &F) { assert(ArgDstPtr->getType()->isPointerTy() && "Mux DMA builtins are always byte-accessed"); - Value *DmaIVs[] = {ArgSrcPtr, ArgDstPtr}; + compiler::utils::CreateLoopOpts opts; + opts.IVs = {ArgSrcPtr, ArgDstPtr}; + opts.loopIVNames = {"dma.src", "dma.dst"}; // Create a loop around 1D DMA memcpy, adding stride, local width each time. BasicBlock *LoopExitBB = compiler::utils::createLoop( LoopEntryBB, nullptr, ConstantInt::get(getSizeType(M), 0), ArgNumPlanes, - DmaIVs, compiler::utils::CreateLoopOpts{}, + opts, [&](BasicBlock *BB, Value *, ArrayRef IVsCurr, MutableArrayRef IVsNext) { IRBuilder<> loopIr(BB); diff --git a/modules/compiler/utils/source/pass_functions.cpp b/modules/compiler/utils/source/pass_functions.cpp index e235e6bee..0d93ce43a 100644 --- a/modules/compiler/utils/source/pass_functions.cpp +++ b/modules/compiler/utils/source/pass_functions.cpp @@ -473,7 +473,6 @@ bool addParamToAllFunctions(llvm::Module &module, llvm::BasicBlock *createLoop(llvm::BasicBlock *entry, llvm::BasicBlock *exit, llvm::Value *indexStart, llvm::Value *indexEnd, - llvm::ArrayRef ivs, const CreateLoopOpts &opts, CreateLoopBodyFn body) { // If the index increment is null, we default to 1 as our index. @@ -483,8 +482,8 @@ llvm::BasicBlock *createLoop(llvm::BasicBlock *entry, llvm::BasicBlock *exit, llvm::LLVMContext &ctx = entry->getContext(); - llvm::SmallVector currIVs(ivs.begin(), ivs.end()); - llvm::SmallVector nextIVs(ivs.size()); + llvm::SmallVector currIVs(opts.IVs.begin(), opts.IVs.end()); + llvm::SmallVector nextIVs(opts.IVs.size()); // Check if indexStart, indexEnd, and indexInc are constants. if (llvm::isa(indexStart) && @@ -564,6 +563,10 @@ llvm::BasicBlock *createLoop(llvm::BasicBlock *entry, llvm::BasicBlock *exit, auto *const phi = loopIR.CreatePHI(currIVs[i]->getType(), 2); llvm::cast(phi)->addIncoming(currIVs[i], entryIR.GetInsertBlock()); + // Set IV names if they've been given to us. + if (i < opts.loopIVNames.size()) { + phi->setName(opts.loopIVNames[i]); + } currIVs[i] = phi; } diff --git a/modules/compiler/utils/source/work_item_loops_pass.cpp b/modules/compiler/utils/source/work_item_loops_pass.cpp index daaaa0a92..af8e5fcdf 100644 --- a/modules/compiler/utils/source/work_item_loops_pass.cpp +++ b/modules/compiler/utils/source/work_item_loops_pass.cpp @@ -386,10 +386,10 @@ struct ScheduleGenerator { Function *const func = block->getParent(); // Induction variables - Value *IVs[] = {accumulator}; auto *const totalSize = barrier.getTotalSize(); compiler::utils::CreateLoopOpts inner_opts; + inner_opts.IVs = {accumulator}; inner_opts.attemptUnroll = true; inner_opts.disableVectorize = true; @@ -427,7 +427,7 @@ struct ScheduleGenerator { // linearly looping through the work items exitBlock = compiler::utils::createLoop( - preheader, exitBlock, zero, totalSize, IVs, inner_opts, + preheader, exitBlock, zero, totalSize, inner_opts, [&](BasicBlock *block, Value *index, ArrayRef ivs, MutableArrayRef ivsNext) -> BasicBlock * { IRBuilder<> ir(block); @@ -711,13 +711,13 @@ struct ScheduleGenerator { if (mainPreheaderBB) { wrapperHasMain = true; // Subgroup induction variables - Value *subgroupIVs2[] = {i32Zero}; compiler::utils::CreateLoopOpts outer_opts; + outer_opts.IVs = {i32Zero}; // looping through num groups in the third (outermost) dimension mainExitBB = compiler::utils::createLoop( mainPreheaderBB, mainExitBB, zero, localSizeDim[workItemDim2], - subgroupIVs2, outer_opts, + outer_opts, [&](BasicBlock *block, Value *dim_2, ArrayRef ivs2, MutableArrayRef ivsNext2) -> BasicBlock * { // if we need to set the local id, do so here. @@ -726,10 +726,12 @@ struct ScheduleGenerator { {ConstantInt::get(i32Ty, workItemDim2), dim_2}) ->setCallingConv(set_local_id->getCallingConv()); + compiler::utils::CreateLoopOpts middle_opts; + middle_opts.IVs = ivs2.vec(); + // looping through num groups in the second dimension BasicBlock *exit1 = compiler::utils::createLoop( - block, nullptr, zero, localSizeDim[workItemDim1], ivs2, - outer_opts, + block, nullptr, zero, localSizeDim[workItemDim1], middle_opts, [&](BasicBlock *block, Value *dim_1, ArrayRef ivs1, MutableArrayRef ivsNext1) -> BasicBlock * { IRBuilder<> ir(block); @@ -742,11 +744,14 @@ struct ScheduleGenerator { IRBuilder<> irph(mainPreheaderBB, mainPreheaderBB->getFirstInsertionPt()); auto *VF = materializeVF(irph, barrierMain.getVFInfo().vf); + compiler::utils::CreateLoopOpts inner_opts; inner_opts.indexInc = VF; + inner_opts.IVs = ivs1.vec(); inner_opts.attemptUnroll = true; + BasicBlock *exit0 = compiler::utils::createLoop( - block, nullptr, zero, mainLoopLimit, ivs1, inner_opts, + block, nullptr, zero, mainLoopLimit, inner_opts, [&](BasicBlock *block, Value *dim_0, ArrayRef ivs0, MutableArrayRef ivsNext0) -> BasicBlock * { @@ -824,15 +829,13 @@ struct ScheduleGenerator { assert(barrierTail); wrapperHasTail = true; // Subgroup induction variables - Value *subgroupIVs2[] = {subgroupMergePhi ? subgroupMergePhi - : nextSubgroupIV}; - compiler::utils::CreateLoopOpts outer_opts; + outer_opts.IVs = {subgroupMergePhi ? subgroupMergePhi : nextSubgroupIV}; // looping through num groups in the third (outermost) dimension tailExitBB = compiler::utils::createLoop( tailPreheaderBB, tailExitBB, zero, localSizeDim[workItemDim2], - subgroupIVs2, outer_opts, + outer_opts, [&](BasicBlock *block, Value *dim_2, ArrayRef ivs2, MutableArrayRef ivsNext2) -> BasicBlock * { // set the local id @@ -841,10 +844,12 @@ struct ScheduleGenerator { {ConstantInt::get(i32Ty, workItemDim2), dim_2}) ->setCallingConv(set_local_id->getCallingConv()); + compiler::utils::CreateLoopOpts middle_opts; + middle_opts.IVs = ivs2.vec(); + // looping through num groups in the second dimension BasicBlock *exit1 = compiler::utils::createLoop( - block, nullptr, zero, localSizeDim[workItemDim1], ivs2, - outer_opts, + block, nullptr, zero, localSizeDim[workItemDim1], middle_opts, [&](BasicBlock *block, Value *dim_1, ArrayRef ivs1, MutableArrayRef ivsNext1) -> BasicBlock * { IRBuilder<> ir(block); @@ -853,11 +858,12 @@ struct ScheduleGenerator { ->setCallingConv(set_local_id->getCallingConv()); compiler::utils::CreateLoopOpts inner_opts; + inner_opts.IVs = ivs1.vec(); inner_opts.attemptUnroll = true; inner_opts.disableVectorize = true; BasicBlock *exit0 = compiler::utils::createLoop( - block, nullptr, zero, peel, ivs1, inner_opts, + block, nullptr, zero, peel, inner_opts, [&](BasicBlock *block, Value *dim_0, ArrayRef ivs0, MutableArrayRef ivsNext0) -> BasicBlock * { @@ -936,16 +942,17 @@ struct ScheduleGenerator { // Same with the scan IV PHINode *scanMergePhi = nullptr; - SmallVector subgroupIVs2 = {i32Zero}; + compiler::utils::CreateLoopOpts outer_opts; + outer_opts.IVs.push_back(i32Zero); + outer_opts.loopIVNames.push_back("sg.z"); if (isScan) { - subgroupIVs2.push_back(nextScanIV); + outer_opts.IVs.push_back(nextScanIV); + outer_opts.loopIVNames.push_back("scan.z"); } - compiler::utils::CreateLoopOpts outer_opts; // looping through num groups in the third (outermost) dimension return compiler::utils::createLoop( - block, nullptr, zero, localSizeDim[workItemDim2], subgroupIVs2, - outer_opts, + block, nullptr, zero, localSizeDim[workItemDim2], outer_opts, [&](BasicBlock *block, Value *dim_2, ArrayRef ivs2, MutableArrayRef ivsNext2) -> BasicBlock * { // set the local id @@ -954,19 +961,16 @@ struct ScheduleGenerator { {ConstantInt::get(i32Ty, workItemDim2), dim_2}) ->setCallingConv(set_local_id->getCallingConv()); - if (auto *i = dyn_cast(ivs2[0])) { - i->setName("sg.z"); - } + compiler::utils::CreateLoopOpts middle_opts; + middle_opts.IVs = ivs2.vec(); + middle_opts.loopIVNames.push_back("sg.y"); if (isScan) { - if (auto *i = dyn_cast(ivs2[1])) { - i->setName("scan.z"); - } + middle_opts.loopIVNames.push_back("scan.y"); } // looping through num groups in the second dimension BasicBlock *exit1 = compiler::utils::createLoop( - block, nullptr, zero, localSizeDim[workItemDim1], ivs2, - outer_opts, + block, nullptr, zero, localSizeDim[workItemDim1], middle_opts, [&](BasicBlock *block, Value *dim_1, ArrayRef ivs1, MutableArrayRef ivsNext1) -> BasicBlock * { IRBuilder<> ir(block); @@ -974,15 +978,6 @@ struct ScheduleGenerator { {ConstantInt::get(i32Ty, workItemDim1), dim_1}) ->setCallingConv(set_local_id->getCallingConv()); - if (auto *i = dyn_cast(ivs1[0])) { - i->setName("sg.y"); - } - if (isScan) { - if (auto *i = dyn_cast(ivs1[1])) { - i->setName("scan.y"); - } - } - // looping through num groups in the first (innermost) // dimension BasicBlock *mainPreheaderBB = block; @@ -1038,24 +1033,23 @@ struct ScheduleGenerator { IRBuilder<> irph(mainPreheaderBB, mainPreheaderBB->getFirstInsertionPt()); auto *VF = materializeVF(irph, barrierMain.getVFInfo().vf); + compiler::utils::CreateLoopOpts inner_vf_opts; inner_vf_opts.indexInc = VF; inner_vf_opts.attemptUnroll = true; + inner_vf_opts.IVs = ivs1.vec(); + inner_vf_opts.loopIVNames.push_back("sg.x.main"); + if (isScan) { + inner_vf_opts.loopIVNames.push_back("scan.y.main"); + } + mainExitBB = compiler::utils::createLoop( - mainPreheaderBB, mainExitBB, zero, mainLoopLimit, ivs1, + mainPreheaderBB, mainExitBB, zero, mainLoopLimit, inner_vf_opts, [&](BasicBlock *block, Value *dim_0, ArrayRef ivs0, MutableArrayRef ivsNext0) -> BasicBlock * { IRBuilder<> ir(block); - if (auto *i = dyn_cast(ivs0[0])) { - i->setName("sg.x.main"); - } - if (isScan) { - if (auto *i = dyn_cast(ivs0[1])) { - i->setName("scan.x.main"); - } - } if (set_subgroup_id) { // set our subgroup id @@ -1196,24 +1190,21 @@ struct ScheduleGenerator { compiler::utils::CreateLoopOpts inner_scalar_opts; inner_scalar_opts.attemptUnroll = true; inner_scalar_opts.disableVectorize = true; + inner_scalar_opts.IVs.assign(subgroupIVs0.begin(), + subgroupIVs0.end()); + inner_scalar_opts.loopIVNames.push_back("sg.x.tail"); + if (isScan) { + inner_scalar_opts.loopIVNames.push_back("scan.x.tail"); + } tailExitBB = compiler::utils::createLoop( - tailPreheaderBB, tailExitBB, zero, peel, subgroupIVs0, + tailPreheaderBB, tailExitBB, zero, peel, inner_scalar_opts, [&](BasicBlock *block, Value *dim_0, ArrayRef ivs0, MutableArrayRef ivsNext0) -> BasicBlock * { IRBuilder<> ir(block); - if (auto *i = dyn_cast(ivs0[0])) { - i->setName("sg.x.tail"); - } - if (isScan) { - if (auto *i = dyn_cast(ivs0[1])) { - i->setName("scan.x.tail"); - } - } - if (set_subgroup_id) { // set our subgroup id ir.CreateCall(set_subgroup_id, {ivs0[0]})