diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index 67e09d0e69ebc3..c87ec5a27822c5 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -57,6 +57,7 @@ // RVA22U64: "-target-feature" "+f" // RVA22U64: "-target-feature" "+d" // RVA22U64: "-target-feature" "+c" +// RVA22U64: "-target-feature" "+b" // RVA22U64: "-target-feature" "+zic64b" // RVA22U64: "-target-feature" "+zicbom" // RVA22U64: "-target-feature" "+zicbop" @@ -83,6 +84,7 @@ // RVA22S64: "-target-feature" "+f" // RVA22S64: "-target-feature" "+d" // RVA22S64: "-target-feature" "+c" +// RVA22S64: "-target-feature" "+b" // RVA22S64: "-target-feature" "+zic64b" // RVA22S64: "-target-feature" "+zicbom" // RVA22S64: "-target-feature" "+zicbop" @@ -118,6 +120,7 @@ // RVA23U64: "-target-feature" "+f" // RVA23U64: "-target-feature" "+d" // RVA23U64: "-target-feature" "+c" +// RVA23U64: "-target-feature" "+b" // RVA23U64: "-target-feature" "+v" // RVA23U64: "-target-feature" "+zic64b" // RVA23U64: "-target-feature" "+zicbom" @@ -156,6 +159,7 @@ // RVA23S64: "-target-feature" "+f" // RVA23S64: "-target-feature" "+d" // RVA23S64: "-target-feature" "+c" +// RVA23S64: "-target-feature" "+b" // RVA23S64: "-target-feature" "+v" // RVA23S64: "-target-feature" "+h" // RVA23S64: "-target-feature" "+zic64b" @@ -217,6 +221,7 @@ // RVB23U64: "-target-feature" "+f" // RVB23U64: "-target-feature" "+d" // RVB23U64: "-target-feature" "+c" +// RVB23U64: "-target-feature" "+b" // RVB23U64: "-target-feature" "+zic64b" // RVB23U64: "-target-feature" "+zicbom" // RVB23U64: "-target-feature" "+zicbop" @@ -249,6 +254,7 @@ // RVB23S64: "-target-feature" "+f" // RVB23S64: "-target-feature" "+d" // RVB23S64: "-target-feature" "+c" +// RVB23S64: "-target-feature" "+b" // RVB23S64: "-target-feature" "+zic64b" // RVB23S64: "-target-feature" "+zicbom" // RVB23S64: "-target-feature" "+zicbop" @@ -290,6 +296,7 @@ // RUN: %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 -menable-experimental-extensions \ // RUN: | FileCheck -check-prefix=RVM23U32 %s // RVM23U32: "-target-feature" "+m" +// RVM23U32: "-target-feature" "+b" // RVM23U32: "-target-feature" "+zicbop" // RVM23U32: "-target-feature" "+zicond" // RVM23U32: "-target-feature" "+zicsr" @@ -309,6 +316,7 @@ // PROFILE-WITH-ADDITIONAL: "-target-feature" "+f" // PROFILE-WITH-ADDITIONAL: "-target-feature" "+d" // PROFILE-WITH-ADDITIONAL: "-target-feature" "+c" +// PROFILE-WITH-ADDITIONAL: "-target-feature" "+b" // PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicbom" // PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicbop" // PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicboz" diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h index 3b0cbcdd49c254..36a996632b71e3 100644 --- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h +++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h @@ -8,9 +8,9 @@ //============================================================================== // // NOTE! -// llvm/lib/ProfileData/CtxInstrContextNode.h and +// llvm/include/llvm/ProfileData/CtxInstrContextNode.h and // compiler-rt/lib/ctx_profile/CtxInstrContextNode.h -// must be exact copies of each other +// must be exact copies of each other. // // compiler-rt creates these objects as part of the instrumentation runtime for // contextual profiling. LLVM only consumes them to convert a contextual tree @@ -114,4 +114,4 @@ class ContextNode final { }; } // namespace ctx_profile } // namespace llvm -#endif \ No newline at end of file +#endif diff --git a/lld/test/ELF/aarch64-feature-pac.s b/lld/test/ELF/aarch64-feature-pac.s index beafe58887db3f..b85a33216cb5bd 100644 --- a/lld/test/ELF/aarch64-feature-pac.s +++ b/lld/test/ELF/aarch64-feature-pac.s @@ -76,12 +76,14 @@ # PACDYN-NOT: 0x0000000070000001 (AARCH64_BTI_PLT) # PACDYN-NOT: 0x0000000070000003 (AARCH64_PAC_PLT) -## Turn on PAC entries with the -z pac-plt command line option. There are no -## warnings in this case as the choice to use PAC in PLT entries is orthogonal -## to the choice of using PAC in relocatable objects. The presence of the PAC -## .note.gnu.property is an indication of preference by the relocatable object. +## Turn on PAC entries with the -z pac-plt command line option. For files w/o +## GNU_PROPERTY_AARCH64_FEATURE_1_PAC set in GNU_PROPERTY_AARCH64_FEATURE_1_AND +## property, emit a warning. + +# RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe 2>&1 | FileCheck -DFILE=%t2.o --check-prefix WARN %s + +# WARN: warning: [[FILE]]: -z pac-plt: file does not have GNU_PROPERTY_AARCH64_FEATURE_1_PAC property -# RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe # RUN: llvm-readelf -n %tpacplt.exe | FileCheck --check-prefix=PACPROP %s # RUN: llvm-readelf --dynamic-table %tpacplt.exe | FileCheck --check-prefix PACDYN2 %s # RUN: llvm-objdump --no-print-imm-hex -d --mattr=+v8.3a --no-show-raw-insn %tpacplt.exe | FileCheck --check-prefix PACPLT %s diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index dca8fd9a0bca0b..2152de9709dc6e 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -599,6 +599,70 @@ described in the ``s2g.tile`` mode intrinsics above. For more information, refer PTX ISA ``_. +'``llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.[1-5]d``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tensor_map, i32 %d0, i64 %ch, i1 %flag_ch) + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(..., i32 %d0, i32 %d1, ...) + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.3d(..., i32 %d0, i32 %d1, i32 %d2, ...) + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, ...) + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, ...) + +Overview: +""""""""" + +The '``@llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.[1-5]d``' intrinsics +correspond to the ``cp.async.bulk.prefetch.tensor.[1-5]d.L2.global*`` set +of PTX instructions. These instructions initiate an asynchronous prefetch +of tensor data from global memory to the L2 cache. In tile mode, the +multi-dimensional layout of the source tensor is preserved at the destination. +The dimension of the tensor data ranges from 1d to 5d with the coordinates +specified by the ``i32 %d0 ... i32 %d4`` arguments. + +* The last argument to these intrinsics is a boolean flag + indicating support for cache_hint. This flag argument must + be a compile-time constant. When set, it indicates a valid + cache_hint (``i64 %ch``) and generates the ``.L2::cache_hint`` + variant of the PTX instruction. + +For more information, refer PTX ISA +``_. + +'``llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.[1-5]d``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.3d(ptr %tensor_map, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 %ch, i1 %flag_ch) + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, ...) + declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, ...) + +Overview: +""""""""" + +The '``@llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.[1-5]d``' intrinsics +correspond to the ``cp.async.bulk.prefetch.tensor.[1-5]d.L2.global*`` set +of PTX instructions. These instructions initiate an asynchronous prefetch +of tensor data from global memory to the L2 cache. In im2col mode, some +dimensions of the source tensor are unrolled into a single dimensional +column at the destination. In this mode, the tensor has to be at least +three-dimensional. Along with the tensor coordinates, im2col offsets are +also specified (denoted by ``i16 im2col0...i16 %im2col2``). The number +of im2col offsets is two less than the number of dimensions of the tensor +operation. The last argument to these intrinsics is a boolean flag, with +the same functionality as described in the ``tile`` mode intrinsics above. + +For more information, refer PTX ISA +``_. + Other Intrinsics ---------------- diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 343a0172ff39ed..b737917b8442da 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -1102,6 +1102,13 @@ class LegalizeRuleSet { return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy); } + LegalizeRuleSet &clampScalar(bool Pred, unsigned TypeIdx, const LLT MinTy, + const LLT MaxTy) { + if (!Pred) + return *this; + return clampScalar(TypeIdx, MinTy, MaxTy); + } + /// Limit the range of scalar sizes to MinTy and MaxTy. LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index f578455905f210..119d3d878206fb 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -19,7 +19,9 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/CoreContainers.h" #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/MaterializationUnit.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" @@ -39,7 +41,6 @@ namespace orc { // Forward declare some classes. class AsynchronousSymbolQuery; class ExecutionSession; -class MaterializationUnit; class MaterializationResponsibility; class JITDylib; class ResourceTracker; @@ -109,23 +110,6 @@ class ResourceManager { ResourceKey SrcK) = 0; }; -/// A set of symbol names (represented by SymbolStringPtrs for -// efficiency). -using SymbolNameSet = DenseSet; - -/// A vector of symbol names. -using SymbolNameVector = std::vector; - -/// A map from symbol names (as SymbolStringPtrs) to JITSymbols -/// (address/flags pairs). -using SymbolMap = DenseMap; - -/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. -using SymbolFlagsMap = DenseMap; - -/// A map from JITDylibs to sets of symbols. -using SymbolDependenceMap = DenseMap; - /// Lookup flags that apply to each dylib in the search order for a lookup. /// /// If MatchHiddenSymbolsOnly is used (the default) for a given dylib, then @@ -682,83 +666,6 @@ class MaterializationResponsibility { SymbolStringPtr InitSymbol; }; -/// A MaterializationUnit represents a set of symbol definitions that can -/// be materialized as a group, or individually discarded (when -/// overriding definitions are encountered). -/// -/// MaterializationUnits are used when providing lazy definitions of symbols to -/// JITDylibs. The JITDylib will call materialize when the address of a symbol -/// is requested via the lookup method. The JITDylib will call discard if a -/// stronger definition is added or already present. -class MaterializationUnit { - friend class ExecutionSession; - friend class JITDylib; - -public: - static char ID; - - struct Interface { - Interface() = default; - Interface(SymbolFlagsMap InitalSymbolFlags, SymbolStringPtr InitSymbol) - : SymbolFlags(std::move(InitalSymbolFlags)), - InitSymbol(std::move(InitSymbol)) { - assert((!this->InitSymbol || this->SymbolFlags.count(this->InitSymbol)) && - "If set, InitSymbol should appear in InitialSymbolFlags map"); - } - - SymbolFlagsMap SymbolFlags; - SymbolStringPtr InitSymbol; - }; - - MaterializationUnit(Interface I) - : SymbolFlags(std::move(I.SymbolFlags)), - InitSymbol(std::move(I.InitSymbol)) {} - virtual ~MaterializationUnit() = default; - - /// Return the name of this materialization unit. Useful for debugging - /// output. - virtual StringRef getName() const = 0; - - /// Return the set of symbols that this source provides. - const SymbolFlagsMap &getSymbols() const { return SymbolFlags; } - - /// Returns the initialization symbol for this MaterializationUnit (if any). - const SymbolStringPtr &getInitializerSymbol() const { return InitSymbol; } - - /// Implementations of this method should materialize all symbols - /// in the materialzation unit, except for those that have been - /// previously discarded. - virtual void - materialize(std::unique_ptr R) = 0; - - /// Called by JITDylibs to notify MaterializationUnits that the given symbol - /// has been overridden. - void doDiscard(const JITDylib &JD, const SymbolStringPtr &Name) { - SymbolFlags.erase(Name); - if (InitSymbol == Name) { - DEBUG_WITH_TYPE("orc", { - dbgs() << "In " << getName() << ": discarding init symbol \"" - << *Name << "\"\n"; - }); - InitSymbol = nullptr; - } - discard(JD, std::move(Name)); - } - -protected: - SymbolFlagsMap SymbolFlags; - SymbolStringPtr InitSymbol; - -private: - virtual void anchor(); - - /// Implementations of this method should discard the given symbol - /// from the source (e.g. if the source is an LLVM IR Module and the - /// symbol is a function, delete the function body or mark it available - /// externally). - virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0; -}; - /// A MaterializationUnit implementation for pre-existing absolute symbols. /// /// All symbols will be resolved and marked ready as soon as the unit is diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CoreContainers.h b/llvm/include/llvm/ExecutionEngine/Orc/CoreContainers.h new file mode 100644 index 00000000000000..06d5aef0119f9b --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/CoreContainers.h @@ -0,0 +1,47 @@ +//===---- CoreContainers.h - Symbol Containers for Core APIs ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Symbol container types for core ORC APIs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H +#define LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" + +#include + +namespace llvm::orc { + +class JITDylib; + +/// A set of symbol names (represented by SymbolStringPtrs for +// efficiency). +using SymbolNameSet = DenseSet; + +/// A vector of symbol names. +using SymbolNameVector = std::vector; + +/// A map from symbol names (as SymbolStringPtrs) to JITSymbols +/// (address/flags pairs). +using SymbolMap = DenseMap; + +/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. +using SymbolFlagsMap = DenseMap; + +/// A map from JITDylibs to sets of symbols. +using SymbolDependenceMap = DenseMap; + +} // End namespace llvm::orc + +#endif // LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MaterializationUnit.h b/llvm/include/llvm/ExecutionEngine/Orc/MaterializationUnit.h new file mode 100644 index 00000000000000..4ac8f6b6ba05ad --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/MaterializationUnit.h @@ -0,0 +1,103 @@ +//===---- MaterializationUnit.h -- Materialization Black Box ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MaterializationUnit class and related types and operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_MATERIALIZATIONUNIT_H +#define LLVM_EXECUTIONENGINE_ORC_MATERIALIZATIONUNIT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/Orc/CoreContainers.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" + +namespace llvm::orc { + +class MaterializationResponsibility; + +/// A MaterializationUnit represents a set of symbol definitions that can +/// be materialized as a group, or individually discarded (when +/// overriding definitions are encountered). +/// +/// MaterializationUnits are used when providing lazy definitions of symbols to +/// JITDylibs. The JITDylib will call materialize when the address of a symbol +/// is requested via the lookup method. The JITDylib will call discard if a +/// stronger definition is added or already present. +class MaterializationUnit { + friend class ExecutionSession; + friend class JITDylib; + +public: + static char ID; + + struct Interface { + Interface() = default; + Interface(SymbolFlagsMap InitalSymbolFlags, SymbolStringPtr InitSymbol) + : SymbolFlags(std::move(InitalSymbolFlags)), + InitSymbol(std::move(InitSymbol)) { + assert((!this->InitSymbol || this->SymbolFlags.count(this->InitSymbol)) && + "If set, InitSymbol should appear in InitialSymbolFlags map"); + } + + SymbolFlagsMap SymbolFlags; + SymbolStringPtr InitSymbol; + }; + + MaterializationUnit(Interface I) + : SymbolFlags(std::move(I.SymbolFlags)), + InitSymbol(std::move(I.InitSymbol)) {} + virtual ~MaterializationUnit() = default; + + /// Return the name of this materialization unit. Useful for debugging + /// output. + virtual StringRef getName() const = 0; + + /// Return the set of symbols that this source provides. + const SymbolFlagsMap &getSymbols() const { return SymbolFlags; } + + /// Returns the initialization symbol for this MaterializationUnit (if any). + const SymbolStringPtr &getInitializerSymbol() const { return InitSymbol; } + + /// Implementations of this method should materialize all symbols + /// in the materialzation unit, except for those that have been + /// previously discarded. + virtual void + materialize(std::unique_ptr R) = 0; + + /// Called by JITDylibs to notify MaterializationUnits that the given symbol + /// has been overridden. + void doDiscard(const JITDylib &JD, const SymbolStringPtr &Name) { + SymbolFlags.erase(Name); + if (InitSymbol == Name) { + DEBUG_WITH_TYPE("orc", { + dbgs() << "In " << getName() << ": discarding init symbol \"" + << *Name << "\"\n"; + }); + InitSymbol = nullptr; + } + discard(JD, std::move(Name)); + } + +protected: + SymbolFlagsMap SymbolFlags; + SymbolStringPtr InitSymbol; + +private: + virtual void anchor(); + + /// Implementations of this method should discard the given symbol + /// from the source (e.g. if the source is an LLVM IR Module and the + /// symbol is a function, delete the function body or mark it available + /// externally). + virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0; +}; + +} // namespace llvm::orc + +#endif // LLVM_EXECUTIONENGINE_ORC_MATERIALIZATIONUNIT_H diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 049d843015d5ae..115fcee0b04f22 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -613,6 +613,28 @@ class CP_ASYNC_BULK_TENSOR_S2G_INTR { ImmArg>]; } +class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { + string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d"; + + bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0); + int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0); + list Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets); + list TensorDimsTy = !listsplat(llvm_i32_ty, dim); + list ArgsTy = !listconcat( + [llvm_ptr_ty], // tensormap_ptr + TensorDimsTy, // actual tensor dims + Im2ColOffsetsTy, // im2col offsets + [llvm_i64_ty, // cache_hint + llvm_i1_ty] // Flag for cache_hint + ); + + int TempFlagsStartIdx = !add(dim, 2); + int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets); + list IntrProp = [IntrConvergent, + ReadOnly>, NoCapture>, + ImmArg>]; +} + let TargetPrefix = "nvvm" in { def int_nvvm_prmt : ClangBuiltin<"__nvvm_prmt">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], @@ -4902,6 +4924,8 @@ foreach dim = [1, 2, 3, 4, 5] in { def g2s.Name : DefaultAttrsIntrinsic<[], g2s.ArgsTy, g2s.IntrProp>; foreach s2g = [CP_ASYNC_BULK_TENSOR_S2G_INTR] in def s2g.Name : DefaultAttrsIntrinsic<[], s2g.ArgsTy, s2g.IntrProp>; + foreach prefetch = [CP_ASYNC_BULK_TENSOR_PREFETCH_INTR] in + def prefetch.Name : DefaultAttrsIntrinsic<[], prefetch.ArgsTy, prefetch.IntrProp>; } } diff --git a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h index 3b0cbcdd49c254..36a996632b71e3 100644 --- a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h +++ b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h @@ -8,9 +8,9 @@ //============================================================================== // // NOTE! -// llvm/lib/ProfileData/CtxInstrContextNode.h and +// llvm/include/llvm/ProfileData/CtxInstrContextNode.h and // compiler-rt/lib/ctx_profile/CtxInstrContextNode.h -// must be exact copies of each other +// must be exact copies of each other. // // compiler-rt creates these objects as part of the instrumentation runtime for // contextual profiling. LLVM only consumes them to convert a contextual tree @@ -114,4 +114,4 @@ class ContextNode final { }; } // namespace ctx_profile } // namespace llvm -#endif \ No newline at end of file +#endif diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0c472c456bd5dd..2e7cf10d48cb62 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -4175,6 +4175,10 @@ bool NVPTXScopes::empty() const { return Scopes.size() == 0; } return CP_ASYNC_BULK_TENSOR_OPCODE(G2S, dim, mode, ); \ }() +#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(dim, mode) \ + (IsCacheHint ? NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode##_CH \ + : NVPTX::CP_ASYNC_BULK_TENSOR_PREFETCH_##dim##_##mode) + static unsigned GetCpAsyncBulkTensorS2GOpcode(size_t Dim, bool IsShared32, bool IsCacheHint, bool IsIm2Col) { if (IsIm2Col) { @@ -4242,6 +4246,55 @@ static unsigned GetCpAsyncBulkTensorG2SOpcode(size_t Dim, bool IsShared32, } } +static unsigned GetCpAsyncBulkTensorPrefetchOpcode(size_t Dim, bool IsCacheHint, + bool IsIm2Col) { + if (IsIm2Col) { + switch (Dim) { + case 3: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(3D, IM2COL); + case 4: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(4D, IM2COL); + case 5: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(5D, IM2COL); + default: + llvm_unreachable("Invalid Dimension in im2col mode for " + "GetCpAsyncBulkTensorPrefetchOpcode."); + } + } else { + switch (Dim) { + case 1: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(1D, TILE); + case 2: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(2D, TILE); + case 3: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(3D, TILE); + case 4: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(4D, TILE); + case 5: + return GET_CP_ASYNC_BULK_TENSOR_OPCODE_PREFETCH(5D, TILE); + default: + llvm_unreachable("Invalid Dimension in tile mode for " + "GetCpAsyncBulkTensorPrefetchOpcode."); + } + } +} + +static size_t GetDimsFromIntrinsic(unsigned IID) { + switch (IID) { + case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d: + return 3; + case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d: + return 4; + case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d: + return 5; + default: + llvm_unreachable("Invalid im2col intrinsic in GetDimsFromIntrinsic."); + } +} + void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(SDNode *N, bool IsIm2Col) { // We have {Chain, Intrinsic-ID} followed by the actual intrisic args: @@ -4250,21 +4303,8 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorG2SCommon(SDNode *N, // multicast_flag, cache_hint_flag} // NumOperands = {Chain, IID} + {Actual intrinsic args} // = {2} + {7 + dims + im2col_offsets} - auto getDimsFromIntrinsic = [](unsigned IID) { - switch (IID) { - case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d: - return 3; - case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d: - return 4; - case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d: - return 5; - default: - llvm_unreachable( - "Invalid im2col intrinsic in SelectCpAsyncBulkTensorG2SCommon."); - } - }; size_t NumOps = N->getNumOperands(); - size_t NumDims = IsIm2Col ? getDimsFromIntrinsic(N->getConstantOperandVal(1)) + size_t NumDims = IsIm2Col ? GetDimsFromIntrinsic(N->getConstantOperandVal(1)) : (NumOps - 9); // Offsets is always 'NumDims - 2' and only for im2col mode size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0; @@ -4316,6 +4356,30 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorS2GCommon(SDNode *N, ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); } +void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorPrefetchCommon(SDNode *N, + bool IsIm2Col) { + // We have {Chain, Intrinsic-ID} followed by the actual intrisic args: + // {src, dims{d0...dN}, im2col_offsets{dims-2} + // cache_hint, cache_hint_flag} + // NumOperands = {Chain, IID} + {Actual intrinsic args} + // = {2} + {3 + dims + im2col_offsets} + size_t NumOps = N->getNumOperands(); + size_t NumDims = IsIm2Col ? GetDimsFromIntrinsic(N->getConstantOperandVal(1)) + : (NumOps - 5); + // Offsets is always 'NumDims - 2' and only for im2col mode + size_t NumOffsets = IsIm2Col ? (NumDims - 2) : 0; + bool IsCacheHint = N->getConstantOperandVal(NumOps - 1) == 1; + size_t NumArgs = NumDims + NumOffsets + (IsCacheHint ? 2 : 1); + + SDLoc DL(N); + SmallVector Ops(N->ops().slice(2, NumArgs)); + Ops.push_back(N->getOperand(0)); // Chain operand + + unsigned Opcode = + GetCpAsyncBulkTensorPrefetchOpcode(NumDims, IsCacheHint, IsIm2Col); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); +} + bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) { unsigned IID = N->getConstantOperandVal(1); switch (IID) { @@ -4345,5 +4409,17 @@ bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) { case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d: SelectCpAsyncBulkTensorG2SCommon(N, /*IsIm2Col=*/true); return true; + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_1d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_2d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_3d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_4d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_tile_5d: + SelectCpAsyncBulkTensorPrefetchCommon(N); + return true; + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_3d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_4d: + case Intrinsic::nvvm_cp_async_bulk_tensor_prefetch_im2col_5d: + SelectCpAsyncBulkTensorPrefetchCommon(N, /*IsIm2Col=*/true); + return true; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 6aa4e9f615a481..d6c80a31b7463d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -94,6 +94,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { void SelectI128toV2I64(SDNode *N); void SelectCpAsyncBulkTensorG2SCommon(SDNode *N, bool IsIm2Col = false); void SelectCpAsyncBulkTensorS2GCommon(SDNode *N, bool IsIm2Col = false); + void SelectCpAsyncBulkTensorPrefetchCommon(SDNode *N, bool IsIm2Col = false); inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 536be22510703d..5878940812f62b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -605,6 +605,52 @@ foreach dim = [1, 2, 3, 4, 5] in { } } +// TMA Prefetch from Global memory to L2 cache +class PREFETCH_STRINGS { + string prefix = "cp.async.bulk.prefetch.tensor"; + string dir = "L2.global"; + string inst_name = prefix + # "." # dim # "d" + # "." # dir + # "." # mode + # !if(ch, ".L2::cache_hint", ""); + string intr_name = "CP_ASYNC_BULK_TENSOR_PREFETCH_" + # dim # "D" + # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); +} + +multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { + defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); + defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); + defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]"; + + defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); + defvar im2col_dag = !if(!eq(mode, "im2col"), + !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), + (ins)); + defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); + defvar im2col_asm_str = ", {{" # im2col_str # "}}"; + + defvar asm_str = !if(!eq(mode, "im2col"), + !strconcat(asm_str_default, im2col_asm_str), asm_str_default); + + def "": NVPTXInst<(outs), + !con((ins Int64Regs:$tmap), dims_dag, im2col_dag), + !strconcat(PREFETCH_STRINGS.inst_name, asm_str, ";"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def _CH: NVPTXInst<(outs), + !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)), + !strconcat(PREFETCH_STRINGS.inst_name, asm_str, ", $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; +} + +foreach dim = [1, 2, 3, 4, 5] in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + defm PREFETCH_STRINGS.intr_name : + CP_ASYNC_BULK_TENSOR_PREFETCH_INTR; + } +} + //----------------------------------- // MBarrier Functions //----------------------------------- diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index f0981a3b1a82f3..2643a1a708dd25 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -30,18 +30,6 @@ using namespace llvm; using namespace LegalityPredicates; using namespace LegalizeMutations; -// Is this type supported by scalar FP arithmetic operations given the current -// subtarget. -static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx, - const RISCVSubtarget &ST) { - return [=, &ST](const LegalityQuery &Query) { - return Query.Types[TypeIdx].isScalar() && - ((ST.hasStdExtZfh() && Query.Types[TypeIdx].getSizeInBits() == 16) || - (ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) || - (ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64)); - }; -} - static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list IntOrFPVecTys, @@ -498,7 +486,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM}) - .legalIf(typeIsScalarFPArith(0, ST)); + .legalFor(ST.hasStdExtF(), {s32}) + .legalFor(ST.hasStdExtD(), {s64}) + .legalFor(ST.hasStdExtZfh(), {s16}); getActionDefinitionsBuilder(G_FREM) .libcallFor({s32, s64}) @@ -506,51 +496,55 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .scalarize(0); getActionDefinitionsBuilder(G_FCOPYSIGN) - .legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST))); + .legalFor(ST.hasStdExtF(), {{s32, s32}}) + .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}}) + .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}}) + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}); // FIXME: Use Zfhmin. - getActionDefinitionsBuilder(G_FPTRUNC).legalIf( - [=, &ST](const LegalityQuery &Query) -> bool { - return (ST.hasStdExtD() && typeIs(0, s32)(Query) && - typeIs(1, s64)(Query)) || - (ST.hasStdExtZfh() && typeIs(0, s16)(Query) && - typeIs(1, s32)(Query)) || - (ST.hasStdExtZfh() && ST.hasStdExtD() && typeIs(0, s16)(Query) && - typeIs(1, s64)(Query)); - }); - getActionDefinitionsBuilder(G_FPEXT).legalIf( - [=, &ST](const LegalityQuery &Query) -> bool { - return (ST.hasStdExtD() && typeIs(0, s64)(Query) && - typeIs(1, s32)(Query)) || - (ST.hasStdExtZfh() && typeIs(0, s32)(Query) && - typeIs(1, s16)(Query)) || - (ST.hasStdExtZfh() && ST.hasStdExtD() && typeIs(0, s64)(Query) && - typeIs(1, s16)(Query)); - }); + getActionDefinitionsBuilder(G_FPTRUNC) + .legalFor(ST.hasStdExtD(), {{s32, s64}}) + .legalFor(ST.hasStdExtZfh(), {{s16, s32}}) + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}}); + getActionDefinitionsBuilder(G_FPEXT) + .legalFor(ST.hasStdExtD(), {{s64, s32}}) + .legalFor(ST.hasStdExtZfh(), {{s32, s16}}) + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}}); getActionDefinitionsBuilder(G_FCMP) - .legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST))) - .clampScalar(0, sXLen, sXLen); + .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) + .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) + .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) + .clampScalar(ST.hasStdExtF(), 0, sXLen, sXLen); // TODO: Support vector version of G_IS_FPCLASS. getActionDefinitionsBuilder(G_IS_FPCLASS) - .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST))); + .customFor(ST.hasStdExtF(), {{s1, s32}}) + .customFor(ST.hasStdExtD(), {{s1, s64}}) + .customFor(ST.hasStdExtZfh(), {{s1, s16}}); getActionDefinitionsBuilder(G_FCONSTANT) - .legalIf(typeIsScalarFPArith(0, ST)) + .legalFor(ST.hasStdExtF(), {s32}) + .legalFor(ST.hasStdExtD(), {s64}) + .legalFor(ST.hasStdExtZfh(), {s16}) .lowerFor({s32, s64}); - auto &FPToIActions = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}); - FPToIActions.legalIf(all(typeInSet(0, {sXLen}), typeIsScalarFPArith(1, ST))); - if (ST.is64Bit()) - FPToIActions.customIf(all(typeInSet(0, {s32}), typeIsScalarFPArith(1, ST))); - FPToIActions.widenScalarToNextPow2(0) + getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) + .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) + .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) + .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) + .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}}) + .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}}) + .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}}) + .widenScalarToNextPow2(0) .minScalar(0, s32) .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {sXLen}))) + .legalFor(ST.hasStdExtF(), {{s32, sXLen}}) + .legalFor(ST.hasStdExtD(), {{s64, sXLen}}) + .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}}) .widenScalarToNextPow2(1) .minScalar(1, sXLen) .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}}) diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td index cbf2a2eddf38ed..bcb776e682aea7 100644 --- a/llvm/lib/Target/RISCV/RISCVProfiles.td +++ b/llvm/lib/Target/RISCV/RISCVProfiles.td @@ -45,9 +45,7 @@ defvar RVA22U64Features = !listconcat(RVA20U64BaseFeatures, [FeatureStdExtZa64rs, FeatureStdExtZihpm, FeatureStdExtZihintpause, - FeatureStdExtZba, - FeatureStdExtZbb, - FeatureStdExtZbs, + FeatureStdExtB, FeatureStdExtZic64b, FeatureStdExtZicbom, FeatureStdExtZicbop, @@ -92,9 +90,7 @@ defvar RVB23U64Features = !listconcat(RVA20U64BaseFeatures, [FeatureStdExtZihpm, FeatureStdExtZa64rs, FeatureStdExtZihintpause, - FeatureStdExtZba, - FeatureStdExtZbb, - FeatureStdExtZbs, + FeatureStdExtB, FeatureStdExtZic64b, FeatureStdExtZicbom, FeatureStdExtZicbop, @@ -128,9 +124,7 @@ defvar RVB23S64Features = !listconcat(RVB23U64Features, defvar RVM23U32Features = [Feature32Bit, FeatureStdExtI, FeatureStdExtM, - FeatureStdExtZba, - FeatureStdExtZbb, - FeatureStdExtZbs, + FeatureStdExtB, FeatureStdExtZicond, FeatureStdExtZihintpause, FeatureStdExtZihintntl, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 253b768f34a07c..3888d207206ec8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27328,8 +27328,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, case Intrinsic::x86_t2rpntlvwz0t1_internal: case Intrinsic::x86_t2rpntlvwz1_internal: case Intrinsic::x86_t2rpntlvwz1t1_internal: { - if (!Subtarget.hasAMXTILE()) - break; auto *X86MFI = DAG.getMachineFunction().getInfo(); X86MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); unsigned IntNo = Op.getConstantOperandVal(1); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 3af51a1d4d30be..1b95450596314b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4737,6 +4737,7 @@ static bool isAMXOpcode(unsigned Opc) { case X86::TILELOADD_EVEX: case X86::TILESTORED_EVEX: case X86::PTILEPAIRLOAD: + case X86::PTILEPAIRSTORE: return true; } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 454fe5a91d375a..ede89b099e8deb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2904,7 +2904,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (auto *SI = dyn_cast(LHS)) { // We cannot do this fold for elementwise select since ShuffleVector is // not elementwise. - if (SI->getCondition()->getType()->isIntegerTy()) { + if (SI->getCondition()->getType()->isIntegerTy() && + (isa(RHS) || + isGuaranteedNotToBePoison(SI->getCondition()))) { if (Instruction *I = FoldOpIntoSelect(SVI, SI)) return I; } diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 4a39a484e41c29..746fdaa340fc7a 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -577,8 +577,8 @@ static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, return I; } - if (auto *RMWI = dyn_cast(I)) { - assert(RMWI->getOperation() == AtomicRMWInst::Xchg && + if (isa(I)) { + assert(cast(I)->getOperation() == AtomicRMWInst::Xchg && "Only Xchg is allowed for pointer values"); // A RMW Xchg is a combined atomic load and store, so we can treat the // loaded value as a base pointer. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4a73b9c2c4b34a..da8e0d8cc09a8b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10986,7 +10986,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // If the selects are the only uses of the compares, they will be // dead and we can adjust the cost by removing their cost. if (VI && SelectOnly) { - assert(!Ty->isVectorTy() && "Expected only for scalar type."); + assert((!Ty->isVectorTy() || SLPReVec) && + "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); IntrinsicCost -= TTI->getCmpSelInstrCost( CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 08db0d51ef3abb..8b1a4aeb88f81f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -58,7 +58,7 @@ static cl::opt PrintVPlansInDotFormat( "vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans")); -#define DEBUG_TYPE "vplan" +#define DEBUG_TYPE "loop-vectorize" #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll new file mode 100644 index 00000000000000..cb3b0c03f75d09 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-prefetch.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX %s +; RUN: %if ptxas-12.3 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} + +target triple = "nvptx64-nvidia-cuda" + +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tm, i32 %d0, i64 %ch, i1 %flag); +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(ptr %tm, i32 %d0, i32 %d1, i64 %ch, i1 %flag); +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.3d(ptr %tm, i32 %d0, i32 %d1, i32 %d2, i64 %ch, i1 %flag); +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.4d(ptr %tm, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i64 %ch, i1 %flag); +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.5d(ptr %tm, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i64 %ch, i1 %flag); + +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.3d(ptr %tm, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 %ch, i1 %f1); +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.4d(ptr %tm, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, i64 %ch, i1 %f1); +declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.5d(ptr %tm, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, i64 %ch, i1 %f1); + +; CHECK-LABEL: cp_async_bulk_tensor_prefetch_tile_1d +define void @cp_async_bulk_tensor_prefetch_tile_1d(ptr %tmap, i32 %d0, i64 %ch) { +; CHECK-PTX-LABEL: cp_async_bulk_tensor_prefetch_tile_1d( +; CHECK-PTX: { +; CHECK-PTX-NEXT: .reg .b32 %r<2>; +; CHECK-PTX-NEXT: .reg .b64 %rd<3>; +; CHECK-PTX-EMPTY: +; CHECK-PTX-NEXT: // %bb.0: +; CHECK-PTX-NEXT: ld.param.u64 %rd1, [cp_async_bulk_tensor_prefetch_tile_1d_param_0]; +; CHECK-PTX-NEXT: ld.param.u32 %r1, [cp_async_bulk_tensor_prefetch_tile_1d_param_1]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.1d.L2.global.tile [%rd1, {%r1}]; +; CHECK-PTX-NEXT: ld.param.u64 %rd2, [cp_async_bulk_tensor_prefetch_tile_1d_param_2]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.1d.L2.global.tile.L2::cache_hint [%rd1, {%r1}], %rd2; +; CHECK-PTX-NEXT: ret; + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tmap, i32 %d0, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tmap, i32 %d0, i64 %ch, i1 1) + ret void +} + +; CHECK-LABEL: cp_async_bulk_tensor_prefetch_tile_2d +define void @cp_async_bulk_tensor_prefetch_tile_2d(i32 %flag, ptr %tmap, i32 %d0, i32 %d1, i64 %ch) { +; CHECK-PTX-LABEL: cp_async_bulk_tensor_prefetch_tile_2d( +; CHECK-PTX: { +; CHECK-PTX-NEXT: .reg .b32 %r<3>; +; CHECK-PTX-NEXT: .reg .b64 %rd<3>; +; CHECK-PTX-EMPTY: +; CHECK-PTX-NEXT: // %bb.0: +; CHECK-PTX-NEXT: ld.param.u64 %rd1, [cp_async_bulk_tensor_prefetch_tile_2d_param_1]; +; CHECK-PTX-NEXT: ld.param.u32 %r1, [cp_async_bulk_tensor_prefetch_tile_2d_param_2]; +; CHECK-PTX-NEXT: ld.param.u32 %r2, [cp_async_bulk_tensor_prefetch_tile_2d_param_3]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.2d.L2.global.tile [%rd1, {%r1, %r2}]; +; CHECK-PTX-NEXT: ld.param.u64 %rd2, [cp_async_bulk_tensor_prefetch_tile_2d_param_4]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.2d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2}], %rd2; +; CHECK-PTX-NEXT: ret; + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(ptr %tmap, i32 %d0, i32 %d1, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(ptr %tmap, i32 %d0, i32 %d1, i64 %ch, i1 1) + ret void +} + +; CHECK-LABEL: cp_async_bulk_tensor_prefetch_3d +define void @cp_async_bulk_tensor_prefetch_3d(i32 %flag, ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 %ch) { +; CHECK-PTX-LABEL: cp_async_bulk_tensor_prefetch_3d( +; CHECK-PTX: { +; CHECK-PTX-NEXT: .reg .b16 %rs<2>; +; CHECK-PTX-NEXT: .reg .b32 %r<4>; +; CHECK-PTX-NEXT: .reg .b64 %rd<3>; +; CHECK-PTX-EMPTY: +; CHECK-PTX-NEXT: // %bb.0: +; CHECK-PTX-NEXT: ld.param.u64 %rd1, [cp_async_bulk_tensor_prefetch_3d_param_1]; +; CHECK-PTX-NEXT: ld.param.u32 %r1, [cp_async_bulk_tensor_prefetch_3d_param_2]; +; CHECK-PTX-NEXT: ld.param.u32 %r2, [cp_async_bulk_tensor_prefetch_3d_param_3]; +; CHECK-PTX-NEXT: ld.param.u32 %r3, [cp_async_bulk_tensor_prefetch_3d_param_4]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.tile [%rd1, {%r1, %r2, %r3}]; +; CHECK-PTX-NEXT: ld.param.u64 %rd2, [cp_async_bulk_tensor_prefetch_3d_param_6]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2, %r3}], %rd2; +; CHECK-PTX-NEXT: ld.param.u16 %rs1, [cp_async_bulk_tensor_prefetch_3d_param_5]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.im2col [%rd1, {%r1, %r2, %r3}], {%rs1}; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.3d.L2.global.im2col.L2::cache_hint [%rd1, {%r1, %r2, %r3}], {%rs1}, %rd2; +; CHECK-PTX-NEXT: ret; + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.3d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.3d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i64 %ch, i1 1) + + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.3d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.3d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 %ch, i1 1) + ret void +} + +; CHECK-LABEL: cp_async_bulk_tensor_prefetch_4d +define void @cp_async_bulk_tensor_prefetch_4d(i32 %flag, ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, i64 %ch) { +; CHECK-PTX-LABEL: cp_async_bulk_tensor_prefetch_4d( +; CHECK-PTX: { +; CHECK-PTX-NEXT: .reg .b16 %rs<3>; +; CHECK-PTX-NEXT: .reg .b32 %r<5>; +; CHECK-PTX-NEXT: .reg .b64 %rd<3>; +; CHECK-PTX-EMPTY: +; CHECK-PTX-NEXT: // %bb.0: +; CHECK-PTX-NEXT: ld.param.u64 %rd1, [cp_async_bulk_tensor_prefetch_4d_param_1]; +; CHECK-PTX-NEXT: ld.param.u32 %r1, [cp_async_bulk_tensor_prefetch_4d_param_2]; +; CHECK-PTX-NEXT: ld.param.u32 %r2, [cp_async_bulk_tensor_prefetch_4d_param_3]; +; CHECK-PTX-NEXT: ld.param.u32 %r3, [cp_async_bulk_tensor_prefetch_4d_param_4]; +; CHECK-PTX-NEXT: ld.param.u32 %r4, [cp_async_bulk_tensor_prefetch_4d_param_5]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.tile [%rd1, {%r1, %r2, %r3, %r4}]; +; CHECK-PTX-NEXT: ld.param.u64 %rd2, [cp_async_bulk_tensor_prefetch_4d_param_8]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2, %r3, %r4}], %rd2; +; CHECK-PTX-NEXT: ld.param.u16 %rs1, [cp_async_bulk_tensor_prefetch_4d_param_6]; +; CHECK-PTX-NEXT: ld.param.u16 %rs2, [cp_async_bulk_tensor_prefetch_4d_param_7]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.im2col [%rd1, {%r1, %r2, %r3, %r4}], {%rs1, %rs2}; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.4d.L2.global.im2col.L2::cache_hint [%rd1, {%r1, %r2, %r3, %r4}], {%rs1, %rs2}, %rd2; +; CHECK-PTX-NEXT: ret; + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.4d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.4d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i64 %ch, i1 1) + + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.4d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.4d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, i64 %ch, i1 1) + ret void +} + +; CHECK-LABEL: cp_async_bulk_tensor_prefetch_5d +define void @cp_async_bulk_tensor_prefetch_5d(i32 %flag, ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, i64 %ch) { +; CHECK-PTX-LABEL: cp_async_bulk_tensor_prefetch_5d( +; CHECK-PTX: { +; CHECK-PTX-NEXT: .reg .b16 %rs<4>; +; CHECK-PTX-NEXT: .reg .b32 %r<6>; +; CHECK-PTX-NEXT: .reg .b64 %rd<3>; +; CHECK-PTX-EMPTY: +; CHECK-PTX-NEXT: // %bb.0: +; CHECK-PTX-NEXT: ld.param.u64 %rd1, [cp_async_bulk_tensor_prefetch_5d_param_1]; +; CHECK-PTX-NEXT: ld.param.u32 %r1, [cp_async_bulk_tensor_prefetch_5d_param_2]; +; CHECK-PTX-NEXT: ld.param.u32 %r2, [cp_async_bulk_tensor_prefetch_5d_param_3]; +; CHECK-PTX-NEXT: ld.param.u32 %r3, [cp_async_bulk_tensor_prefetch_5d_param_4]; +; CHECK-PTX-NEXT: ld.param.u32 %r4, [cp_async_bulk_tensor_prefetch_5d_param_5]; +; CHECK-PTX-NEXT: ld.param.u32 %r5, [cp_async_bulk_tensor_prefetch_5d_param_6]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.tile [%rd1, {%r1, %r2, %r3, %r4, %r5}]; +; CHECK-PTX-NEXT: ld.param.u64 %rd2, [cp_async_bulk_tensor_prefetch_5d_param_10]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.tile.L2::cache_hint [%rd1, {%r1, %r2, %r3, %r4, %r5}], %rd2; +; CHECK-PTX-NEXT: ld.param.u16 %rs1, [cp_async_bulk_tensor_prefetch_5d_param_7]; +; CHECK-PTX-NEXT: ld.param.u16 %rs2, [cp_async_bulk_tensor_prefetch_5d_param_8]; +; CHECK-PTX-NEXT: ld.param.u16 %rs3, [cp_async_bulk_tensor_prefetch_5d_param_9]; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.im2col [%rd1, {%r1, %r2, %r3, %r4, %r5}], {%rs1, %rs2, %rs3}; +; CHECK-PTX-NEXT: cp.async.bulk.prefetch.tensor.5d.L2.global.im2col.L2::cache_hint [%rd1, {%r1, %r2, %r3, %r4, %r5}], {%rs1, %rs2, %rs3}, %rd2; +; CHECK-PTX-NEXT: ret; + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.5d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.5d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i64 %ch, i1 1) + + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.5d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, i64 undef, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.5d(ptr %tmap, i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, i64 %ch, i1 1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 8d767059024045..b611442eb9ba4e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -307,8 +307,8 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCONSTANT (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_VASTART (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK @@ -354,8 +354,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_SCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined @@ -453,27 +453,27 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FADD (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FSUB (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FMUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FMA (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FMAD (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FDIV (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FREM (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK @@ -509,28 +509,28 @@ # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPTOUI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_SITOFP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_UITOFP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPTOSI_SAT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined @@ -539,25 +539,25 @@ # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCANONICALIZE (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FMINNUM (opcode {{[0-9]+}}): 1 type index # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FMAXNUM (opcode {{[0-9]+}}): 1 type index # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FMINNUM_IEEE (opcode {{[0-9]+}}): 1 type index # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined @@ -710,8 +710,8 @@ # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 1, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index a89ae1742bb3af..7624071f4f93ec 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -592,13 +592,13 @@ ; RVI20U64: .attribute 5, "rv64i2p1" ; RVA20U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zmmul1p0_za128rs1p0" ; RVA20S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zmmul1p0_za128rs1p0_ssccptr1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0" -; RVA22U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" -; RVA22S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscounterenw1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0_svinval1p0_svpbmt1p0" -; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0" -; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" -; RVB23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" -; RVB23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" -; RVM23U32: .attribute 5, "rv32i2p1_m2p0_zicbop1p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zimop1p0_zmmul1p0_zca1p0_zcb1p0_zce1p0_zcmop1p0_zcmp1p0_zcmt1p0_zba1p0_zbb1p0_zbs1p0" +; RVA22U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" +; RVA22S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscounterenw1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0_svinval1p0_svpbmt1p0" +; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0" +; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" +; RVB23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" +; RVB23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" +; RVM23U32: .attribute 5, "rv32i2p1_m2p0_b1p0_zicbop1p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zimop1p0_zmmul1p0_zca1p0_zcb1p0_zce1p0_zcmop1p0_zcmp1p0_zcmt1p0_zba1p0_zbb1p0_zbs1p0" define i32 @addi(i32 %a) { %1 = add i32 %a, 1 diff --git a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll index b06a9369b9762d..2025ee94a97405 100644 --- a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll +++ b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll @@ -80,18 +80,18 @@ define void @test_amx_spill(i8* %pointer, i8* %base, i64 %stride) #0 { ; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm0 ; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 ; CHECK-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: tilestored %tmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 1024-byte Folded Spill -; CHECK-NEXT: tilestored %tmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 1024-byte Folded Spill +; CHECK-NEXT: movabsq $64, %rcx +; CHECK-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill +; CHECK-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill ; CHECK-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: tilestored %tmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 1024-byte Folded Spill -; CHECK-NEXT: tilestored %tmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 1024-byte Folded Spill +; CHECK-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill +; CHECK-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill ; CHECK-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 -; CHECK-NEXT: tilestored %tmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 1024-byte Folded Spill -; CHECK-NEXT: tilestored %tmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 1024-byte Folded Spill +; CHECK-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill +; CHECK-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill ; CHECK-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 ; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx) ; CHECK-NEXT: tilestored %tmm5, (%rsi,%rdx) -; CHECK-NEXT: movabsq $64, %rcx ; CHECK-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload ; CHECK-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload ; CHECK-NEXT: tilestored %tmm4, (%rsi,%rdx) diff --git a/llvm/test/ExecutionEngine/JITLink/Generic/Inputs/sectcreate-data.txt b/llvm/test/ExecutionEngine/JITLink/Generic/Inputs/sect@create/sectcreate-data.txt similarity index 100% rename from llvm/test/ExecutionEngine/JITLink/Generic/Inputs/sectcreate-data.txt rename to llvm/test/ExecutionEngine/JITLink/Generic/Inputs/sect@create/sectcreate-data.txt diff --git a/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test b/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test index c09513a7d3707c..08b6372dcf2c73 100644 --- a/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test +++ b/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test @@ -1,6 +1,6 @@ # RUN: llc -filetype=obj -o %t.o %S/Inputs/main-ret-0.ll # RUN: llvm-jitlink -noexec \ -# RUN: -sectcreate __data,%S/Inputs/sectcreate-data.txt@foo=0 \ +# RUN: -sectcreate __data,%S/Inputs/sect@create/sectcreate-data.txt@foo=0 \ # RUN: %t.o # # Use -sectcreate to create a section from a data file. diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 163d9c9557b239..9fb68b5399c845 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -2411,6 +2411,18 @@ define <4 x i32> @shuf_same_length_vec_select(<4 x i1> %cond) { ret <4 x i32> %shuf } +; Make sure we do not fold in this case. +define <4 x i8> @shuf_cmp_may_be_poison(<4 x i8> %x, <4 x i8> %y, i1 %cmp) { +; CHECK-LABEL: @shuf_cmp_may_be_poison( +; CHECK-NEXT: [[Y:%.*]] = select i1 [[CMP:%.*]], <4 x i8> [[Y1:%.*]], <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> , <4 x i32> +; CHECK-NEXT: ret <4 x i8> [[TMP1]] +; + %sel = select i1 %cmp, <4 x i8> %y, <4 x i8> + %shuf = shufflevector <4 x i8> %sel, <4 x i8> , <4 x i32> + ret <4 x i8> %shuf +} + declare i1 @cond() declare <4 x i32> @value() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index a38835f5613fd8..d68556fca4774f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; This is the loop in c++ being vectorize in this file with ;vector.reverse ; #pragma clang loop vectorize_width(4, scalable) @@ -195,12 +194,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Interleaving disabled by the pass manager -; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Vectorizing: innermost loop. -; CHECK-EMPTY: +; CHECK: LV: Loop does not require scalar epilogue ; entry: %cmp7 = icmp sgt i32 %n, 0 @@ -414,11 +408,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Interleaving disabled by the pass manager -; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Vectorizing: innermost loop. +; CHECK: LV: Loop does not require scalar epilogue ; entry: %cmp7 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index 3d00ddf89aaa3b..b312688b7932dc 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -94,3 +94,43 @@ entry: %23 = fcmp ogt <8 x float> zeroinitializer, %19 ret void } + +define void @test3(float %0) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) +; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; +entry: + br label %for.body.lr.ph + +for.body.lr.ph: + br i1 false, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %for.body.lr.ph + %1 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %5, %for.body ] + %2 = phi <2 x float> [ zeroinitializer, %for.body.lr.ph ], [ %6, %for.body ] + ret void + +for.body: + %3 = load <2 x float>, ptr null, align 4 + %4 = fcmp olt <2 x float> zeroinitializer, %3 + %5 = select <2 x i1> , <2 x float> %3, <2 x float> zeroinitializer + %6 = select <2 x i1> %4, <2 x float> %3, <2 x float> zeroinitializer + br label %for.cond.cleanup +} diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index cc144cda18e1a3..261daae5e67f94 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -1740,7 +1740,7 @@ static Error addSectCreates(Session &S, StringRef SCArg(*SCItr); - auto [SectAndFileName, ExtraSymbolsString] = SCArg.split('@'); + auto [SectAndFileName, ExtraSymbolsString] = SCArg.rsplit('@'); auto [SectName, FileName] = SectAndFileName.rsplit(','); if (SectName.empty()) return make_error("In -sectcreate=" + SCArg + diff --git a/mlir/include/mlir/IR/Dominance.h b/mlir/include/mlir/IR/Dominance.h index 2536ce585b3fdd..95c99bd59f7b2f 100644 --- a/mlir/include/mlir/IR/Dominance.h +++ b/mlir/include/mlir/IR/Dominance.h @@ -141,8 +141,8 @@ class DominanceInfo : public detail::DominanceInfoBase { /// are in the same block and A properly dominates B within the block, or if /// the block that contains A properly dominates the block that contains B. In /// an SSACFG region, Operation A dominates Operation B in the same block if A - /// preceeds B. In a Graph region, all operations in a block dominate all - /// other operations in the same block. + /// preceeds B. In a Graph region, all operations in a block properly dominate + /// all operations in the same block. /// /// The `enclosingOpOk` flag says whether we should return true if the B op /// is enclosed by a region on A. @@ -176,9 +176,14 @@ class DominanceInfo : public detail::DominanceInfoBase { /// Return true if the specified block A properly dominates block B, i.e.: if /// block A contains block B, or if the region which contains block A also /// contains block B or some parent of block B and block A dominates that - /// block in that kind of region. In an SSACFG region, block A dominates - /// block B if all control flow paths from the entry block to block B flow - /// through block A. In a Graph region, all blocks dominate all other blocks. + /// block in that kind of region. + /// + /// In an SSACFG region, block A dominates block B if all control flow paths + /// from the entry block to block B flow through block A. + /// + /// Graph regions have only a single block. To be consistent with "proper + /// dominance" of ops, the single block is considered to properly dominate + /// itself in a graph region. bool properlyDominates(Block *a, Block *b) const { return super::properlyDominates(a, b); } diff --git a/mlir/lib/IR/Dominance.cpp b/mlir/lib/IR/Dominance.cpp index 2b138ae223546e..31f7e7dbc925ce 100644 --- a/mlir/lib/IR/Dominance.cpp +++ b/mlir/lib/IR/Dominance.cpp @@ -34,7 +34,8 @@ DominanceInfoBase::~DominanceInfoBase() { delete entry.second.getPointer(); } -template void DominanceInfoBase::invalidate() { +template +void DominanceInfoBase::invalidate() { for (auto entry : dominanceInfos) delete entry.second.getPointer(); dominanceInfos.clear(); @@ -217,9 +218,10 @@ template bool DominanceInfoBase::properlyDominates(Block *a, Block *b) const { assert(a && b && "null blocks not allowed"); - // A block dominates itself but does not properly dominate itself. + // A block dominates, but does not properly dominate, itself unless this + // is a graph region. if (a == b) - return false; + return !hasSSADominance(a); // If both blocks are not in the same region, `a` properly dominates `b` if // `b` is defined in an operation region that (recursively) ends up being @@ -269,7 +271,7 @@ bool DominanceInfo::properlyDominatesImpl(Operation *a, Operation *b, Block *aBlock = a->getBlock(), *bBlock = b->getBlock(); assert(aBlock && bBlock && "operations must be in a block"); - // An instruction dominates, but does not properlyDominate, itself unless this + // An operation dominates, but does not properly dominate, itself unless this // is a graph region. if (a == b) return !hasSSADominance(aBlock); diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir index 3c53193db7f72f..a926a8271200a3 100644 --- a/mlir/test/Analysis/test-dominance.mlir +++ b/mlir/test/Analysis/test-dominance.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-dominance))" -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-dominance))" -split-input-file | FileCheck %s // CHECK-LABEL: Testing : func_condBranch func.func @func_condBranch(%cond : i1) { @@ -10,40 +10,117 @@ func.func @func_condBranch(%cond : i1) { ^exit: return } -// CHECK-LABEL: --- DominanceInfo --- -// CHECK-NEXT: Nearest(0, 0) = 0 -// CHECK-NEXT: Nearest(0, 1) = 0 -// CHECK-NEXT: Nearest(0, 2) = 0 -// CHECK-NEXT: Nearest(0, 3) = 0 + +// CHECK: --- DominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 0 +// CHECK: Nearest(0, 2) = 0 +// CHECK: Nearest(0, 3) = 0 +// CHECK: Nearest(0, 4) = 4 // CHECK: Nearest(1, 0) = 0 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-NEXT: Nearest(1, 2) = 0 -// CHECK-NEXT: Nearest(1, 3) = 0 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 0 +// CHECK: Nearest(1, 3) = 0 +// CHECK: Nearest(1, 4) = 4 // CHECK: Nearest(2, 0) = 0 -// CHECK-NEXT: Nearest(2, 1) = 0 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 0 +// CHECK: Nearest(2, 1) = 0 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 0 +// CHECK: Nearest(2, 4) = 4 // CHECK: Nearest(3, 0) = 0 -// CHECK-NEXT: Nearest(3, 1) = 0 -// CHECK-NEXT: Nearest(3, 2) = 0 -// CHECK-NEXT: Nearest(3, 3) = 3 -// CHECK-LABEL: --- PostDominanceInfo --- -// CHECK-NEXT: Nearest(0, 0) = 0 -// CHECK-NEXT: Nearest(0, 1) = 3 -// CHECK-NEXT: Nearest(0, 2) = 3 -// CHECK-NEXT: Nearest(0, 3) = 3 +// CHECK: Nearest(3, 1) = 0 +// CHECK: Nearest(3, 2) = 0 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(4, 0) = 4 +// CHECK: Nearest(4, 1) = 4 +// CHECK: Nearest(4, 2) = 4 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 + +// CHECK: --- PostDominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 3 +// CHECK: Nearest(0, 2) = 3 +// CHECK: Nearest(0, 3) = 3 +// CHECK: Nearest(0, 4) = 4 // CHECK: Nearest(1, 0) = 3 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-NEXT: Nearest(1, 2) = 3 -// CHECK-NEXT: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 3 +// CHECK: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 4) = 4 // CHECK: Nearest(2, 0) = 3 -// CHECK-NEXT: Nearest(2, 1) = 3 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 1) = 3 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 4) = 4 // CHECK: Nearest(3, 0) = 3 -// CHECK-NEXT: Nearest(3, 1) = 3 -// CHECK-NEXT: Nearest(3, 2) = 3 -// CHECK-NEXT: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 1) = 3 +// CHECK: Nearest(3, 2) = 3 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(4, 0) = 4 +// CHECK: Nearest(4, 1) = 4 +// CHECK: Nearest(4, 2) = 4 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 + +// CHECK: --- Block Dominance relationship --- +// CHECK: dominates(0, 0) = 1 (properly = 0) +// CHECK: dominates(0, 1) = 1 (properly = 1) +// CHECK: dominates(0, 2) = 1 (properly = 1) +// CHECK: dominates(0, 3) = 1 (properly = 1) +// CHECK: dominates(0, 4) = 0 (properly = 0) +// CHECK: dominates(1, 0) = 0 (properly = 0) +// CHECK: dominates(1, 1) = 1 (properly = 0) +// CHECK: dominates(1, 2) = 0 (properly = 0) +// CHECK: dominates(1, 3) = 0 (properly = 0) +// CHECK: dominates(1, 4) = 0 (properly = 0) +// CHECK: dominates(2, 0) = 0 (properly = 0) +// CHECK: dominates(2, 1) = 0 (properly = 0) +// CHECK: dominates(2, 2) = 1 (properly = 0) +// CHECK: dominates(2, 3) = 0 (properly = 0) +// CHECK: dominates(2, 4) = 0 (properly = 0) +// CHECK: dominates(3, 0) = 0 (properly = 0) +// CHECK: dominates(3, 1) = 0 (properly = 0) +// CHECK: dominates(3, 2) = 0 (properly = 0) +// CHECK: dominates(3, 3) = 1 (properly = 0) +// CHECK: dominates(3, 4) = 0 (properly = 0) +// CHECK: dominates(4, 0) = 1 (properly = 1) +// CHECK: dominates(4, 1) = 1 (properly = 1) +// CHECK: dominates(4, 2) = 1 (properly = 1) +// CHECK: dominates(4, 3) = 1 (properly = 1) +// CHECK: dominates(4, 4) = 1 (properly = 1) + +// CHECK: --- Block PostDominance relationship --- +// CHECK: postdominates(0, 0) = 1 (properly = 0) +// CHECK: postdominates(0, 1) = 0 (properly = 0) +// CHECK: postdominates(0, 2) = 0 (properly = 0) +// CHECK: postdominates(0, 3) = 0 (properly = 0) +// CHECK: postdominates(0, 4) = 0 (properly = 0) +// CHECK: postdominates(1, 0) = 0 (properly = 0) +// CHECK: postdominates(1, 1) = 1 (properly = 0) +// CHECK: postdominates(1, 2) = 0 (properly = 0) +// CHECK: postdominates(1, 3) = 0 (properly = 0) +// CHECK: postdominates(1, 4) = 0 (properly = 0) +// CHECK: postdominates(2, 0) = 0 (properly = 0) +// CHECK: postdominates(2, 1) = 0 (properly = 0) +// CHECK: postdominates(2, 2) = 1 (properly = 0) +// CHECK: postdominates(2, 3) = 0 (properly = 0) +// CHECK: postdominates(2, 4) = 0 (properly = 0) +// CHECK: postdominates(3, 0) = 1 (properly = 1) +// CHECK: postdominates(3, 1) = 1 (properly = 1) +// CHECK: postdominates(3, 2) = 1 (properly = 1) +// CHECK: postdominates(3, 3) = 1 (properly = 0) +// CHECK: postdominates(3, 4) = 0 (properly = 0) +// CHECK: postdominates(4, 0) = 1 (properly = 1) +// CHECK: postdominates(4, 1) = 1 (properly = 1) +// CHECK: postdominates(4, 2) = 1 (properly = 1) +// CHECK: postdominates(4, 3) = 1 (properly = 1) +// CHECK: postdominates(4, 4) = 1 (properly = 1) + +// CHECK: module attributes {test.block_ids = array} +// CHECK: func.func @func_condBranch({{.*}}) attributes {test.block_ids = array} // ----- @@ -60,32 +137,117 @@ func.func @func_loop(%arg0 : i32, %arg1 : i32) { ^exit: return } -// CHECK-LABEL: --- DominanceInfo --- + +// CHECK: --- DominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 0 +// CHECK: Nearest(0, 2) = 0 +// CHECK: Nearest(0, 3) = 0 +// CHECK: Nearest(0, 4) = 4 // CHECK: Nearest(1, 0) = 0 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-NEXT: Nearest(1, 2) = 1 -// CHECK-NEXT: Nearest(1, 3) = 1 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 1 +// CHECK: Nearest(1, 3) = 1 +// CHECK: Nearest(1, 4) = 4 // CHECK: Nearest(2, 0) = 0 -// CHECK-NEXT: Nearest(2, 1) = 1 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 1 +// CHECK: Nearest(2, 1) = 1 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 1 +// CHECK: Nearest(2, 4) = 4 // CHECK: Nearest(3, 0) = 0 -// CHECK-NEXT: Nearest(3, 1) = 1 -// CHECK-NEXT: Nearest(3, 2) = 1 -// CHECK-NEXT: Nearest(3, 3) = 3 -// CHECK-LABEL: --- PostDominanceInfo --- +// CHECK: Nearest(3, 1) = 1 +// CHECK: Nearest(3, 2) = 1 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(4, 0) = 4 +// CHECK: Nearest(4, 1) = 4 +// CHECK: Nearest(4, 2) = 4 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 + +// CHECK: --- PostDominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 1 +// CHECK: Nearest(0, 2) = 1 +// CHECK: Nearest(0, 3) = 3 +// CHECK: Nearest(0, 4) = 4 // CHECK: Nearest(1, 0) = 1 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-NEXT: Nearest(1, 2) = 1 -// CHECK-NEXT: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 1 +// CHECK: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 4) = 4 // CHECK: Nearest(2, 0) = 1 -// CHECK-NEXT: Nearest(2, 1) = 1 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 1) = 1 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 4) = 4 // CHECK: Nearest(3, 0) = 3 -// CHECK-NEXT: Nearest(3, 1) = 3 -// CHECK-NEXT: Nearest(3, 2) = 3 -// CHECK-NEXT: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 1) = 3 +// CHECK: Nearest(3, 2) = 3 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(4, 0) = 4 +// CHECK: Nearest(4, 1) = 4 +// CHECK: Nearest(4, 2) = 4 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 + +// CHECK: --- Block Dominance relationship --- +// CHECK: dominates(0, 0) = 1 (properly = 0) +// CHECK: dominates(0, 1) = 1 (properly = 1) +// CHECK: dominates(0, 2) = 1 (properly = 1) +// CHECK: dominates(0, 3) = 1 (properly = 1) +// CHECK: dominates(0, 4) = 0 (properly = 0) +// CHECK: dominates(1, 0) = 0 (properly = 0) +// CHECK: dominates(1, 1) = 1 (properly = 0) +// CHECK: dominates(1, 2) = 1 (properly = 1) +// CHECK: dominates(1, 3) = 1 (properly = 1) +// CHECK: dominates(1, 4) = 0 (properly = 0) +// CHECK: dominates(2, 0) = 0 (properly = 0) +// CHECK: dominates(2, 1) = 0 (properly = 0) +// CHECK: dominates(2, 2) = 1 (properly = 0) +// CHECK: dominates(2, 3) = 0 (properly = 0) +// CHECK: dominates(2, 4) = 0 (properly = 0) +// CHECK: dominates(3, 0) = 0 (properly = 0) +// CHECK: dominates(3, 1) = 0 (properly = 0) +// CHECK: dominates(3, 2) = 0 (properly = 0) +// CHECK: dominates(3, 3) = 1 (properly = 0) +// CHECK: dominates(3, 4) = 0 (properly = 0) +// CHECK: dominates(4, 0) = 1 (properly = 1) +// CHECK: dominates(4, 1) = 1 (properly = 1) +// CHECK: dominates(4, 2) = 1 (properly = 1) +// CHECK: dominates(4, 3) = 1 (properly = 1) +// CHECK: dominates(4, 4) = 1 (properly = 1) + +// CHECK: --- Block PostDominance relationship --- +// CHECK: postdominates(0, 0) = 1 (properly = 0) +// CHECK: postdominates(0, 1) = 0 (properly = 0) +// CHECK: postdominates(0, 2) = 0 (properly = 0) +// CHECK: postdominates(0, 3) = 0 (properly = 0) +// CHECK: postdominates(0, 4) = 0 (properly = 0) +// CHECK: postdominates(1, 0) = 1 (properly = 1) +// CHECK: postdominates(1, 1) = 1 (properly = 0) +// CHECK: postdominates(1, 2) = 1 (properly = 1) +// CHECK: postdominates(1, 3) = 0 (properly = 0) +// CHECK: postdominates(1, 4) = 0 (properly = 0) +// CHECK: postdominates(2, 0) = 0 (properly = 0) +// CHECK: postdominates(2, 1) = 0 (properly = 0) +// CHECK: postdominates(2, 2) = 1 (properly = 0) +// CHECK: postdominates(2, 3) = 0 (properly = 0) +// CHECK: postdominates(2, 4) = 0 (properly = 0) +// CHECK: postdominates(3, 0) = 1 (properly = 1) +// CHECK: postdominates(3, 1) = 1 (properly = 1) +// CHECK: postdominates(3, 2) = 1 (properly = 1) +// CHECK: postdominates(3, 3) = 1 (properly = 0) +// CHECK: postdominates(3, 4) = 0 (properly = 0) +// CHECK: postdominates(4, 0) = 1 (properly = 1) +// CHECK: postdominates(4, 1) = 1 (properly = 1) +// CHECK: postdominates(4, 2) = 1 (properly = 1) +// CHECK: postdominates(4, 3) = 1 (properly = 1) +// CHECK: postdominates(4, 4) = 1 (properly = 1) + +// CHECK: module attributes {test.block_ids = array} +// CHECK: func.func @func_loop({{.*}}) attributes {test.block_ids = array} // ----- @@ -95,16 +257,57 @@ func.func @nested_region(%arg0 : index, %arg1 : index, %arg2 : index) { return } -// CHECK-LABEL: --- DominanceInfo --- -// CHECK-NEXT: Nearest(0, 0) = 0 -// CHECK-NEXT: Nearest(0, 1) = 1 +// CHECK: --- DominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 1 +// CHECK: Nearest(0, 2) = 2 // CHECK: Nearest(1, 0) = 1 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-LABEL: --- PostDominanceInfo --- -// CHECK-NEXT: Nearest(0, 0) = 0 -// CHECK-NEXT: Nearest(0, 1) = 1 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 2 +// CHECK: Nearest(2, 0) = 2 +// CHECK: Nearest(2, 1) = 2 +// CHECK: Nearest(2, 2) = 2 + +// CHECK: --- PostDominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 1 +// CHECK: Nearest(0, 2) = 2 // CHECK: Nearest(1, 0) = 1 -// CHECK-NEXT: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 2 +// CHECK: Nearest(2, 0) = 2 +// CHECK: Nearest(2, 1) = 2 +// CHECK: Nearest(2, 2) = 2 + +// CHECK: --- Block Dominance relationship --- +// CHECK: dominates(0, 0) = 1 (properly = 0) +// CHECK: dominates(0, 1) = 0 (properly = 0) +// CHECK: dominates(0, 2) = 0 (properly = 0) +// CHECK: dominates(1, 0) = 1 (properly = 1) +// CHECK: dominates(1, 1) = 1 (properly = 0) +// CHECK: dominates(1, 2) = 0 (properly = 0) +// CHECK: dominates(2, 0) = 1 (properly = 1) +// CHECK: dominates(2, 1) = 1 (properly = 1) +// CHECK: dominates(2, 2) = 1 (properly = 1) + +// CHECK: --- Block PostDominance relationship --- +// CHECK: postdominates(0, 0) = 1 (properly = 0) +// CHECK: postdominates(0, 1) = 0 (properly = 0) +// CHECK: postdominates(0, 2) = 0 (properly = 0) +// CHECK: postdominates(1, 0) = 1 (properly = 1) +// CHECK: postdominates(1, 1) = 1 (properly = 0) +// CHECK: postdominates(1, 2) = 0 (properly = 0) +// CHECK: postdominates(2, 0) = 1 (properly = 1) +// CHECK: postdominates(2, 1) = 1 (properly = 1) +// CHECK: postdominates(2, 2) = 1 (properly = 1) + +// CHECK: module attributes {test.block_ids = array} { +// CHECK: func.func @nested_region({{.*}}) attributes {test.block_ids = array} { +// CHECK: scf.for {{.*}} { +// CHECK: } {test.block_ids = array} +// CHECK: return +// CHECK: } +// CHECK: } // ----- @@ -117,32 +320,126 @@ func.func @nested_region2(%arg0 : index, %arg1 : index, %arg2 : index) { } return } -// CHECK-LABEL: --- DominanceInfo --- + +// CHECK: --- DominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 1 +// CHECK: Nearest(0, 2) = 2 +// CHECK: Nearest(0, 3) = 3 +// CHECK: Nearest(0, 4) = 4 // CHECK: Nearest(1, 0) = 1 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-NEXT: Nearest(1, 2) = 2 -// CHECK-NEXT: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 2 +// CHECK: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 4) = 4 // CHECK: Nearest(2, 0) = 2 -// CHECK-NEXT: Nearest(2, 1) = 2 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 1) = 2 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 4) = 4 // CHECK: Nearest(3, 0) = 3 -// CHECK-NEXT: Nearest(3, 1) = 3 -// CHECK-NEXT: Nearest(3, 2) = 3 -// CHECK-NEXT: Nearest(3, 3) = 3 -// CHECK-LABEL: --- PostDominanceInfo --- -// CHECK-NEXT: Nearest(0, 0) = 0 -// CHECK-NEXT: Nearest(0, 1) = 1 -// CHECK-NEXT: Nearest(0, 2) = 2 -// CHECK-NEXT: Nearest(0, 3) = 3 +// CHECK: Nearest(3, 1) = 3 +// CHECK: Nearest(3, 2) = 3 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(4, 0) = 4 +// CHECK: Nearest(4, 1) = 4 +// CHECK: Nearest(4, 2) = 4 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 + +// CHECK: --- PostDominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 1 +// CHECK: Nearest(0, 2) = 2 +// CHECK: Nearest(0, 3) = 3 +// CHECK: Nearest(0, 4) = 4 // CHECK: Nearest(1, 0) = 1 -// CHECK-NEXT: Nearest(1, 1) = 1 -// CHECK-NEXT: Nearest(1, 2) = 2 -// CHECK-NEXT: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 2 +// CHECK: Nearest(1, 3) = 3 +// CHECK: Nearest(1, 4) = 4 // CHECK: Nearest(2, 0) = 2 -// CHECK-NEXT: Nearest(2, 1) = 2 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 1) = 2 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 3 +// CHECK: Nearest(2, 4) = 4 +// CHECK: Nearest(3, 0) = 3 +// CHECK: Nearest(3, 1) = 3 +// CHECK: Nearest(3, 2) = 3 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(4, 0) = 4 +// CHECK: Nearest(4, 1) = 4 +// CHECK: Nearest(4, 2) = 4 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 + +// CHECK: --- Block Dominance relationship --- +// CHECK: dominates(0, 0) = 1 (properly = 0) +// CHECK: dominates(0, 1) = 0 (properly = 0) +// CHECK: dominates(0, 2) = 0 (properly = 0) +// CHECK: dominates(0, 3) = 0 (properly = 0) +// CHECK: dominates(0, 4) = 0 (properly = 0) +// CHECK: dominates(1, 0) = 1 (properly = 1) +// CHECK: dominates(1, 1) = 1 (properly = 0) +// CHECK: dominates(1, 2) = 0 (properly = 0) +// CHECK: dominates(1, 3) = 0 (properly = 0) +// CHECK: dominates(1, 4) = 0 (properly = 0) +// CHECK: dominates(2, 0) = 1 (properly = 1) +// CHECK: dominates(2, 1) = 1 (properly = 1) +// CHECK: dominates(2, 2) = 1 (properly = 0) +// CHECK: dominates(2, 3) = 0 (properly = 0) +// CHECK: dominates(2, 4) = 0 (properly = 0) +// CHECK: dominates(3, 0) = 1 (properly = 1) +// CHECK: dominates(3, 1) = 1 (properly = 1) +// CHECK: dominates(3, 2) = 1 (properly = 1) +// CHECK: dominates(3, 3) = 1 (properly = 0) +// CHECK: dominates(3, 4) = 0 (properly = 0) +// CHECK: dominates(4, 0) = 1 (properly = 1) +// CHECK: dominates(4, 1) = 1 (properly = 1) +// CHECK: dominates(4, 2) = 1 (properly = 1) +// CHECK: dominates(4, 3) = 1 (properly = 1) +// CHECK: dominates(4, 4) = 1 (properly = 1) + +// CHECK: --- Block PostDominance relationship --- +// CHECK: postdominates(0, 0) = 1 (properly = 0) +// CHECK: postdominates(0, 1) = 0 (properly = 0) +// CHECK: postdominates(0, 2) = 0 (properly = 0) +// CHECK: postdominates(0, 3) = 0 (properly = 0) +// CHECK: postdominates(0, 4) = 0 (properly = 0) +// CHECK: postdominates(1, 0) = 1 (properly = 1) +// CHECK: postdominates(1, 1) = 1 (properly = 0) +// CHECK: postdominates(1, 2) = 0 (properly = 0) +// CHECK: postdominates(1, 3) = 0 (properly = 0) +// CHECK: postdominates(1, 4) = 0 (properly = 0) +// CHECK: postdominates(2, 0) = 1 (properly = 1) +// CHECK: postdominates(2, 1) = 1 (properly = 1) +// CHECK: postdominates(2, 2) = 1 (properly = 0) +// CHECK: postdominates(2, 3) = 0 (properly = 0) +// CHECK: postdominates(2, 4) = 0 (properly = 0) +// CHECK: postdominates(3, 0) = 1 (properly = 1) +// CHECK: postdominates(3, 1) = 1 (properly = 1) +// CHECK: postdominates(3, 2) = 1 (properly = 1) +// CHECK: postdominates(3, 3) = 1 (properly = 0) +// CHECK: postdominates(3, 4) = 0 (properly = 0) +// CHECK: postdominates(4, 0) = 1 (properly = 1) +// CHECK: postdominates(4, 1) = 1 (properly = 1) +// CHECK: postdominates(4, 2) = 1 (properly = 1) +// CHECK: postdominates(4, 3) = 1 (properly = 1) +// CHECK: postdominates(4, 4) = 1 (properly = 1) + +// CHECK: module attributes {test.block_ids = array} { +// CHECK: func.func @nested_region2({{.*}}) attributes {test.block_ids = array} { +// CHECK: scf.for {{.*}} { +// CHECK: scf.for {{.*}} { +// CHECK: scf.for {{.*}} { +// CHECK: } {test.block_ids = array} +// CHECK: } {test.block_ids = array} +// CHECK: } {test.block_ids = array} +// CHECK: return +// CHECK: } +// CHECK: } // ----- @@ -167,141 +464,219 @@ func.func @func_loop_nested_region( ^exit: return } -// CHECK-LABEL: --- DominanceInfo --- + +// CHECK: --- DominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 0 +// CHECK: Nearest(0, 2) = 0 +// CHECK: Nearest(0, 3) = 0 +// CHECK: Nearest(0, 4) = 0 +// CHECK: Nearest(0, 5) = 0 +// CHECK: Nearest(0, 6) = 6 +// CHECK: Nearest(1, 0) = 0 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 1 +// CHECK: Nearest(1, 3) = 1 +// CHECK: Nearest(1, 4) = 1 +// CHECK: Nearest(1, 5) = 1 +// CHECK: Nearest(1, 6) = 6 // CHECK: Nearest(2, 0) = 0 -// CHECK-NEXT: Nearest(2, 1) = 1 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 2 -// CHECK-NEXT: Nearest(2, 4) = 2 -// CHECK-NEXT: Nearest(2, 5) = 1 +// CHECK: Nearest(2, 1) = 1 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 2 +// CHECK: Nearest(2, 4) = 2 +// CHECK: Nearest(2, 5) = 1 +// CHECK: Nearest(2, 6) = 6 // CHECK: Nearest(3, 0) = 0 -// CHECK-NEXT: Nearest(3, 1) = 1 -// CHECK-NEXT: Nearest(3, 2) = 2 -// CHECK-NEXT: Nearest(3, 3) = 3 -// CHECK-NEXT: Nearest(3, 4) = 4 -// CHECK-NEXT: Nearest(3, 5) = 1 +// CHECK: Nearest(3, 1) = 1 +// CHECK: Nearest(3, 2) = 2 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(3, 5) = 1 +// CHECK: Nearest(3, 6) = 6 // CHECK: Nearest(4, 0) = 0 -// CHECK-NEXT: Nearest(4, 1) = 1 -// CHECK-NEXT: Nearest(4, 2) = 2 -// CHECK-NEXT: Nearest(4, 3) = 4 -// CHECK-NEXT: Nearest(4, 4) = 4 -// CHECK-NEXT: Nearest(4, 5) = 1 -// CHECK-LABEL: --- PostDominanceInfo --- +// CHECK: Nearest(4, 1) = 1 +// CHECK: Nearest(4, 2) = 2 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 +// CHECK: Nearest(4, 5) = 1 +// CHECK: Nearest(4, 6) = 6 +// CHECK: Nearest(5, 0) = 0 +// CHECK: Nearest(5, 1) = 1 +// CHECK: Nearest(5, 2) = 1 +// CHECK: Nearest(5, 3) = 1 +// CHECK: Nearest(5, 4) = 1 +// CHECK: Nearest(5, 5) = 5 +// CHECK: Nearest(5, 6) = 6 +// CHECK: Nearest(6, 0) = 6 +// CHECK: Nearest(6, 1) = 6 +// CHECK: Nearest(6, 2) = 6 +// CHECK: Nearest(6, 3) = 6 +// CHECK: Nearest(6, 4) = 6 +// CHECK: Nearest(6, 5) = 6 +// CHECK: Nearest(6, 6) = 6 + +// CHECK: --- PostDominanceInfo --- +// CHECK: Nearest(0, 0) = 0 +// CHECK: Nearest(0, 1) = 1 +// CHECK: Nearest(0, 2) = 1 +// CHECK: Nearest(0, 3) = 1 +// CHECK: Nearest(0, 4) = 1 +// CHECK: Nearest(0, 5) = 5 +// CHECK: Nearest(0, 6) = 6 +// CHECK: Nearest(1, 0) = 1 +// CHECK: Nearest(1, 1) = 1 +// CHECK: Nearest(1, 2) = 1 +// CHECK: Nearest(1, 3) = 1 +// CHECK: Nearest(1, 4) = 1 +// CHECK: Nearest(1, 5) = 5 +// CHECK: Nearest(1, 6) = 6 // CHECK: Nearest(2, 0) = 1 -// CHECK-NEXT: Nearest(2, 1) = 1 -// CHECK-NEXT: Nearest(2, 2) = 2 -// CHECK-NEXT: Nearest(2, 3) = 2 -// CHECK-NEXT: Nearest(2, 4) = 2 -// CHECK-NEXT: Nearest(2, 5) = 5 +// CHECK: Nearest(2, 1) = 1 +// CHECK: Nearest(2, 2) = 2 +// CHECK: Nearest(2, 3) = 2 +// CHECK: Nearest(2, 4) = 2 +// CHECK: Nearest(2, 5) = 5 +// CHECK: Nearest(2, 6) = 6 // CHECK: Nearest(3, 0) = 1 -// CHECK-NEXT: Nearest(3, 1) = 1 -// CHECK-NEXT: Nearest(3, 2) = 2 -// CHECK-NEXT: Nearest(3, 3) = 3 -// CHECK-NEXT: Nearest(3, 4) = 4 -// CHECK-NEXT: Nearest(3, 5) = 5 +// CHECK: Nearest(3, 1) = 1 +// CHECK: Nearest(3, 2) = 2 +// CHECK: Nearest(3, 3) = 3 +// CHECK: Nearest(3, 4) = 4 +// CHECK: Nearest(3, 5) = 5 +// CHECK: Nearest(3, 6) = 6 // CHECK: Nearest(4, 0) = 1 -// CHECK-NEXT: Nearest(4, 1) = 1 -// CHECK-NEXT: Nearest(4, 2) = 2 -// CHECK-NEXT: Nearest(4, 3) = 4 -// CHECK-NEXT: Nearest(4, 4) = 4 -// CHECK-NEXT: Nearest(4, 5) = 5 -// CHECK-LABEL: --- Block Dominance relationship --- -// CHECK-NEXT: dominates(0, 0) = true -// CHECK-NEXT: dominates(0, 1) = true -// CHECK-NEXT: dominates(0, 2) = true -// CHECK-NEXT: dominates(0, 3) = true -// CHECK-NEXT: dominates(0, 4) = true -// CHECK-NEXT: dominates(0, 5) = true -// CHECK-NEXT: dominates(0, 6) = false -// CHECK-NEXT: dominates(1, 0) = false -// CHECK-NEXT: dominates(1, 1) = true -// CHECK-NEXT: dominates(1, 2) = true -// CHECK-NEXT: dominates(1, 3) = true -// CHECK-NEXT: dominates(1, 4) = true -// CHECK-NEXT: dominates(1, 5) = true -// CHECK-NEXT: dominates(1, 6) = false -// CHECK-NEXT: dominates(2, 0) = false -// CHECK-NEXT: dominates(2, 1) = false -// CHECK-NEXT: dominates(2, 2) = true -// CHECK-NEXT: dominates(2, 3) = true -// CHECK-NEXT: dominates(2, 4) = true -// CHECK-NEXT: dominates(2, 5) = false -// CHECK-NEXT: dominates(2, 6) = false -// CHECK-NEXT: dominates(3, 0) = false -// CHECK-NEXT: dominates(3, 1) = false -// CHECK-NEXT: dominates(3, 2) = false -// CHECK-NEXT: dominates(3, 3) = true -// CHECK-NEXT: dominates(3, 4) = false -// CHECK-NEXT: dominates(3, 5) = false -// CHECK-NEXT: dominates(3, 6) = false -// CHECK-NEXT: dominates(4, 0) = false -// CHECK-NEXT: dominates(4, 1) = false -// CHECK-NEXT: dominates(4, 2) = false -// CHECK-NEXT: dominates(4, 3) = true -// CHECK-NEXT: dominates(4, 4) = true -// CHECK-NEXT: dominates(4, 5) = false -// CHECK-NEXT: dominates(4, 6) = false -// CHECK-NEXT: dominates(5, 0) = false -// CHECK-NEXT: dominates(5, 1) = false -// CHECK-NEXT: dominates(5, 2) = false -// CHECK-NEXT: dominates(5, 3) = false -// CHECK-NEXT: dominates(5, 4) = false -// CHECK-NEXT: dominates(5, 5) = true -// CHECK-NEXT: dominates(5, 6) = false -// CHECK-NEXT: dominates(6, 0) = true -// CHECK-NEXT: dominates(6, 1) = true -// CHECK-NEXT: dominates(6, 2) = true -// CHECK-NEXT: dominates(6, 3) = true -// CHECK-NEXT: dominates(6, 4) = true -// CHECK-NEXT: dominates(6, 5) = true -// CHECK-NEXT: dominates(6, 6) = true -// CHECK-LABEL: --- Block PostDominance relationship --- -// CHECK-NEXT: postdominates(0, 0) = true -// CHECK-NEXT: postdominates(0, 1) = false -// CHECK-NEXT: postdominates(0, 2) = false -// CHECK-NEXT: postdominates(0, 3) = false -// CHECK-NEXT: postdominates(0, 4) = false -// CHECK-NEXT: postdominates(0, 5) = false -// CHECK-NEXT: postdominates(0, 6) = false -// CHECK-NEXT: postdominates(1, 0) = true -// CHECK-NEXT: postdominates(1, 1) = true -// CHECK-NEXT: postdominates(1, 2) = true -// CHECK-NEXT: postdominates(1, 3) = true -// CHECK-NEXT: postdominates(1, 4) = true -// CHECK-NEXT: postdominates(1, 5) = false -// CHECK-NEXT: postdominates(1, 6) = false -// CHECK-NEXT: postdominates(2, 0) = false -// CHECK-NEXT: postdominates(2, 1) = false -// CHECK-NEXT: postdominates(2, 2) = true -// CHECK-NEXT: postdominates(2, 3) = true -// CHECK-NEXT: postdominates(2, 4) = true -// CHECK-NEXT: postdominates(2, 5) = false -// CHECK-NEXT: postdominates(2, 6) = false -// CHECK-NEXT: postdominates(3, 0) = false -// CHECK-NEXT: postdominates(3, 1) = false -// CHECK-NEXT: postdominates(3, 2) = false -// CHECK-NEXT: postdominates(3, 3) = true -// CHECK-NEXT: postdominates(3, 4) = false -// CHECK-NEXT: postdominates(3, 5) = false -// CHECK-NEXT: postdominates(3, 6) = false -// CHECK-NEXT: postdominates(4, 0) = false -// CHECK-NEXT: postdominates(4, 1) = false -// CHECK-NEXT: postdominates(4, 2) = false -// CHECK-NEXT: postdominates(4, 3) = true -// CHECK-NEXT: postdominates(4, 4) = true -// CHECK-NEXT: postdominates(4, 5) = false -// CHECK-NEXT: postdominates(4, 6) = false -// CHECK-NEXT: postdominates(5, 0) = true -// CHECK-NEXT: postdominates(5, 1) = true -// CHECK-NEXT: postdominates(5, 2) = true -// CHECK-NEXT: postdominates(5, 3) = true -// CHECK-NEXT: postdominates(5, 4) = true -// CHECK-NEXT: postdominates(5, 5) = true -// CHECK-NEXT: postdominates(5, 6) = false -// CHECK-NEXT: postdominates(6, 0) = true -// CHECK-NEXT: postdominates(6, 1) = true -// CHECK-NEXT: postdominates(6, 2) = true -// CHECK-NEXT: postdominates(6, 3) = true -// CHECK-NEXT: postdominates(6, 4) = true -// CHECK-NEXT: postdominates(6, 5) = true -// CHECK-NEXT: postdominates(6, 6) = true +// CHECK: Nearest(4, 1) = 1 +// CHECK: Nearest(4, 2) = 2 +// CHECK: Nearest(4, 3) = 4 +// CHECK: Nearest(4, 4) = 4 +// CHECK: Nearest(4, 5) = 5 +// CHECK: Nearest(4, 6) = 6 +// CHECK: Nearest(5, 0) = 5 +// CHECK: Nearest(5, 1) = 5 +// CHECK: Nearest(5, 2) = 5 +// CHECK: Nearest(5, 3) = 5 +// CHECK: Nearest(5, 4) = 5 +// CHECK: Nearest(5, 5) = 5 +// CHECK: Nearest(5, 6) = 6 +// CHECK: Nearest(6, 0) = 6 +// CHECK: Nearest(6, 1) = 6 +// CHECK: Nearest(6, 2) = 6 +// CHECK: Nearest(6, 3) = 6 +// CHECK: Nearest(6, 4) = 6 +// CHECK: Nearest(6, 5) = 6 +// CHECK: Nearest(6, 6) = 6 + +// CHECK: --- Block Dominance relationship --- +// CHECK: dominates(0, 0) = 1 (properly = 0) +// CHECK: dominates(0, 1) = 1 (properly = 1) +// CHECK: dominates(0, 2) = 1 (properly = 1) +// CHECK: dominates(0, 3) = 1 (properly = 1) +// CHECK: dominates(0, 4) = 1 (properly = 1) +// CHECK: dominates(0, 5) = 1 (properly = 1) +// CHECK: dominates(0, 6) = 0 (properly = 0) +// CHECK: dominates(1, 0) = 0 (properly = 0) +// CHECK: dominates(1, 1) = 1 (properly = 0) +// CHECK: dominates(1, 2) = 1 (properly = 1) +// CHECK: dominates(1, 3) = 1 (properly = 1) +// CHECK: dominates(1, 4) = 1 (properly = 1) +// CHECK: dominates(1, 5) = 1 (properly = 1) +// CHECK: dominates(1, 6) = 0 (properly = 0) +// CHECK: dominates(2, 0) = 0 (properly = 0) +// CHECK: dominates(2, 1) = 0 (properly = 0) +// CHECK: dominates(2, 2) = 1 (properly = 0) +// CHECK: dominates(2, 3) = 1 (properly = 1) +// CHECK: dominates(2, 4) = 1 (properly = 1) +// CHECK: dominates(2, 5) = 0 (properly = 0) +// CHECK: dominates(2, 6) = 0 (properly = 0) +// CHECK: dominates(3, 0) = 0 (properly = 0) +// CHECK: dominates(3, 1) = 0 (properly = 0) +// CHECK: dominates(3, 2) = 0 (properly = 0) +// CHECK: dominates(3, 3) = 1 (properly = 0) +// CHECK: dominates(3, 4) = 0 (properly = 0) +// CHECK: dominates(3, 5) = 0 (properly = 0) +// CHECK: dominates(3, 6) = 0 (properly = 0) +// CHECK: dominates(4, 0) = 0 (properly = 0) +// CHECK: dominates(4, 1) = 0 (properly = 0) +// CHECK: dominates(4, 2) = 0 (properly = 0) +// CHECK: dominates(4, 3) = 1 (properly = 1) +// CHECK: dominates(4, 4) = 1 (properly = 0) +// CHECK: dominates(4, 5) = 0 (properly = 0) +// CHECK: dominates(4, 6) = 0 (properly = 0) +// CHECK: dominates(5, 0) = 0 (properly = 0) +// CHECK: dominates(5, 1) = 0 (properly = 0) +// CHECK: dominates(5, 2) = 0 (properly = 0) +// CHECK: dominates(5, 3) = 0 (properly = 0) +// CHECK: dominates(5, 4) = 0 (properly = 0) +// CHECK: dominates(5, 5) = 1 (properly = 0) +// CHECK: dominates(5, 6) = 0 (properly = 0) +// CHECK: dominates(6, 0) = 1 (properly = 1) +// CHECK: dominates(6, 1) = 1 (properly = 1) +// CHECK: dominates(6, 2) = 1 (properly = 1) +// CHECK: dominates(6, 3) = 1 (properly = 1) +// CHECK: dominates(6, 4) = 1 (properly = 1) +// CHECK: dominates(6, 5) = 1 (properly = 1) +// CHECK: dominates(6, 6) = 1 (properly = 1) + +// CHECK: --- Block PostDominance relationship --- +// CHECK: postdominates(0, 0) = 1 (properly = 0) +// CHECK: postdominates(0, 1) = 0 (properly = 0) +// CHECK: postdominates(0, 2) = 0 (properly = 0) +// CHECK: postdominates(0, 3) = 0 (properly = 0) +// CHECK: postdominates(0, 4) = 0 (properly = 0) +// CHECK: postdominates(0, 5) = 0 (properly = 0) +// CHECK: postdominates(0, 6) = 0 (properly = 0) +// CHECK: postdominates(1, 0) = 1 (properly = 1) +// CHECK: postdominates(1, 1) = 1 (properly = 0) +// CHECK: postdominates(1, 2) = 1 (properly = 1) +// CHECK: postdominates(1, 3) = 1 (properly = 1) +// CHECK: postdominates(1, 4) = 1 (properly = 1) +// CHECK: postdominates(1, 5) = 0 (properly = 0) +// CHECK: postdominates(1, 6) = 0 (properly = 0) +// CHECK: postdominates(2, 0) = 0 (properly = 0) +// CHECK: postdominates(2, 1) = 0 (properly = 0) +// CHECK: postdominates(2, 2) = 1 (properly = 0) +// CHECK: postdominates(2, 3) = 1 (properly = 1) +// CHECK: postdominates(2, 4) = 1 (properly = 1) +// CHECK: postdominates(2, 5) = 0 (properly = 0) +// CHECK: postdominates(2, 6) = 0 (properly = 0) +// CHECK: postdominates(3, 0) = 0 (properly = 0) +// CHECK: postdominates(3, 1) = 0 (properly = 0) +// CHECK: postdominates(3, 2) = 0 (properly = 0) +// CHECK: postdominates(3, 3) = 1 (properly = 0) +// CHECK: postdominates(3, 4) = 0 (properly = 0) +// CHECK: postdominates(3, 5) = 0 (properly = 0) +// CHECK: postdominates(3, 6) = 0 (properly = 0) +// CHECK: postdominates(4, 0) = 0 (properly = 0) +// CHECK: postdominates(4, 1) = 0 (properly = 0) +// CHECK: postdominates(4, 2) = 0 (properly = 0) +// CHECK: postdominates(4, 3) = 1 (properly = 1) +// CHECK: postdominates(4, 4) = 1 (properly = 0) +// CHECK: postdominates(4, 5) = 0 (properly = 0) +// CHECK: postdominates(4, 6) = 0 (properly = 0) +// CHECK: postdominates(5, 0) = 1 (properly = 1) +// CHECK: postdominates(5, 1) = 1 (properly = 1) +// CHECK: postdominates(5, 2) = 1 (properly = 1) +// CHECK: postdominates(5, 3) = 1 (properly = 1) +// CHECK: postdominates(5, 4) = 1 (properly = 1) +// CHECK: postdominates(5, 5) = 1 (properly = 0) +// CHECK: postdominates(5, 6) = 0 (properly = 0) +// CHECK: postdominates(6, 0) = 1 (properly = 1) +// CHECK: postdominates(6, 1) = 1 (properly = 1) +// CHECK: postdominates(6, 2) = 1 (properly = 1) +// CHECK: postdominates(6, 3) = 1 (properly = 1) +// CHECK: postdominates(6, 4) = 1 (properly = 1) +// CHECK: postdominates(6, 5) = 1 (properly = 1) +// CHECK: postdominates(6, 6) = 1 (properly = 1) + +// CHECK: module attributes {test.block_ids = array} { +// CHECK: func.func @func_loop_nested_region({{.*}}) attributes {test.block_ids = array} { +// CHECK: ^{{.*}} +// CHECK: ^{{.*}} +// CHECK: scf.for {{.*}} { +// CHECK: scf.for {{.*}} { +// CHECK: } {test.block_ids = array} +// CHECK: } {test.block_ids = array} +// CHECK: ^{{.*}} +// CHECK: } +// CHECK: } diff --git a/mlir/test/lib/IR/TestDominance.cpp b/mlir/test/lib/IR/TestDominance.cpp index fab80bdacb032d..b34149b3e2cbdf 100644 --- a/mlir/test/lib/IR/TestDominance.cpp +++ b/mlir/test/lib/IR/TestDominance.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/Builders.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" @@ -24,24 +25,46 @@ static bool dominatesOrPostDominates(DominanceInfo &dominanceInfo, Block *a, Block *b) { return dominanceInfo.dominates(a, b); } - static bool dominatesOrPostDominates(PostDominanceInfo &dominanceInfo, Block *a, Block *b) { return dominanceInfo.postDominates(a, b); } +static bool properlyDominatesOrPostDominates(DominanceInfo &dominanceInfo, + Block *a, Block *b) { + return dominanceInfo.properlyDominates(a, b); +} +static bool properlyDominatesOrPostDominates(PostDominanceInfo &dominanceInfo, + Block *a, Block *b) { + return dominanceInfo.properlyPostDominates(a, b); +} namespace { /// Helper class to print dominance information. class DominanceTest { public: + static constexpr StringRef kBlockIdsAttrName = "test.block_ids"; + /// Constructs a new test instance using the given operation. DominanceTest(Operation *operation) : operation(operation) { - // Create unique ids for each block. + Builder b(operation->getContext()); + + // Helper function that annotates the IR with block IDs. + auto annotateBlockId = [&](Operation *op, int64_t blockId) { + auto idAttr = op->getAttrOfType(kBlockIdsAttrName); + SmallVector ids; + if (idAttr) + ids = llvm::to_vector(idAttr.asArrayRef()); + ids.push_back(blockId); + op->setAttr(kBlockIdsAttrName, b.getDenseI64ArrayAttr(ids)); + }; + + // Create unique IDs for each block. operation->walk([&](Operation *nested) { if (blockIds.count(nested->getBlock()) > 0) return; blockIds.insert({nested->getBlock(), blockIds.size()}); + annotateBlockId(nested->getBlock()->getParentOp(), blockIds.size() - 1); }); } @@ -61,26 +84,28 @@ class DominanceTest { if (!visited.insert(nestedBlock).second) return; if (printCommonDominatorInfo) { - llvm::errs() << "Nearest(" << blockIds[block] << ", " + llvm::outs() << "Nearest(" << blockIds[block] << ", " << blockIds[nestedBlock] << ") = "; Block *dom = dominanceInfo.findNearestCommonDominator(block, nestedBlock); if (dom) - llvm::errs() << blockIds[dom]; + llvm::outs() << blockIds[dom]; else - llvm::errs() << ""; - llvm::errs() << "\n"; + llvm::outs() << ""; + llvm::outs() << "\n"; } else { if (std::is_same::value) - llvm::errs() << "dominates("; - else - llvm::errs() << "postdominates("; - llvm::errs() << blockIds[block] << ", " << blockIds[nestedBlock] - << ") = "; - if (dominatesOrPostDominates(dominanceInfo, block, nestedBlock)) - llvm::errs() << "true\n"; + llvm::outs() << "dominates("; else - llvm::errs() << "false\n"; + llvm::outs() << "postdominates("; + llvm::outs() << blockIds[block] << ", " << blockIds[nestedBlock] + << ") = " + << std::to_string(dominatesOrPostDominates( + dominanceInfo, block, nestedBlock)) + << " (properly = " + << std::to_string(properlyDominatesOrPostDominates( + dominanceInfo, block, nestedBlock)) + << ")\n"; } }); }); @@ -101,24 +126,24 @@ struct TestDominancePass } void runOnOperation() override { - llvm::errs() << "Testing : " << getOperation().getName() << "\n"; + llvm::outs() << "Testing : " << getOperation().getName() << "\n"; DominanceTest dominanceTest(getOperation()); // Print dominance information. - llvm::errs() << "--- DominanceInfo ---\n"; + llvm::outs() << "--- DominanceInfo ---\n"; dominanceTest.printDominance(getAnalysis(), /*printCommonDominatorInfo=*/true); - llvm::errs() << "--- PostDominanceInfo ---\n"; + llvm::outs() << "--- PostDominanceInfo ---\n"; dominanceTest.printDominance(getAnalysis(), /*printCommonDominatorInfo=*/true); // Print dominance relationship between blocks. - llvm::errs() << "--- Block Dominance relationship ---\n"; + llvm::outs() << "--- Block Dominance relationship ---\n"; dominanceTest.printDominance(getAnalysis(), /*printCommonDominatorInfo=*/false); - llvm::errs() << "--- Block PostDominance relationship ---\n"; + llvm::outs() << "--- Block PostDominance relationship ---\n"; dominanceTest.printDominance(getAnalysis(), /*printCommonDominatorInfo=*/false); }