diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp index 5eb3267adb0799..1231f954298adc 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp @@ -9,7 +9,8 @@ #include "UseStartsEndsWithCheck.h" #include "../utils/ASTUtils.h" -#include "../utils/OptionsUtils.h" +#include "../utils/Matchers.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Lex/Lexer.h" #include @@ -82,60 +83,53 @@ UseStartsEndsWithCheck::UseStartsEndsWithCheck(StringRef Name, void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { const auto ZeroLiteral = integerLiteral(equals(0)); - const auto HasStartsWithMethodWithName = [](const std::string &Name) { - return hasMethod( - cxxMethodDecl(hasName(Name), isConst(), parameterCountIs(1)) - .bind("starts_with_fun")); + const auto ClassTypeWithMethod = [](const StringRef MethodBoundName, + const auto... Methods) { + return cxxRecordDecl(anyOf( + hasMethod(cxxMethodDecl(isConst(), parameterCountIs(1), + returns(booleanType()), hasAnyName(Methods)) + .bind(MethodBoundName))...)); }; - const auto HasStartsWithMethod = - anyOf(HasStartsWithMethodWithName("starts_with"), - HasStartsWithMethodWithName("startsWith"), - HasStartsWithMethodWithName("startswith")); + const auto OnClassWithStartsWithFunction = - on(hasType(hasCanonicalType(hasDeclaration(cxxRecordDecl( - anyOf(HasStartsWithMethod, - hasAnyBase(hasType(hasCanonicalType( - hasDeclaration(cxxRecordDecl(HasStartsWithMethod))))))))))); - - const auto HasEndsWithMethodWithName = [](const std::string &Name) { - return hasMethod( - cxxMethodDecl(hasName(Name), isConst(), parameterCountIs(1)) - .bind("ends_with_fun")); - }; - const auto HasEndsWithMethod = anyOf(HasEndsWithMethodWithName("ends_with"), - HasEndsWithMethodWithName("endsWith"), - HasEndsWithMethodWithName("endswith")); - const auto OnClassWithEndsWithFunction = - on(expr(hasType(hasCanonicalType(hasDeclaration(cxxRecordDecl( - anyOf(HasEndsWithMethod, - hasAnyBase(hasType(hasCanonicalType(hasDeclaration( - cxxRecordDecl(HasEndsWithMethod))))))))))) - .bind("haystack")); + ClassTypeWithMethod("starts_with_fun", "starts_with", "startsWith", + "startswith", "StartsWith"); + + const auto OnClassWithEndsWithFunction = ClassTypeWithMethod( + "ends_with_fun", "ends_with", "endsWith", "endswith", "EndsWith"); // Case 1: X.find(Y) [!=]= 0 -> starts_with. const auto FindExpr = cxxMemberCallExpr( anyOf(argumentCountIs(1), hasArgument(1, ZeroLiteral)), - callee(cxxMethodDecl(hasName("find")).bind("find_fun")), - OnClassWithStartsWithFunction, hasArgument(0, expr().bind("needle"))); + callee( + cxxMethodDecl(hasName("find"), ofClass(OnClassWithStartsWithFunction)) + .bind("find_fun")), + hasArgument(0, expr().bind("needle"))); // Case 2: X.rfind(Y, 0) [!=]= 0 -> starts_with. const auto RFindExpr = cxxMemberCallExpr( hasArgument(1, ZeroLiteral), - callee(cxxMethodDecl(hasName("rfind")).bind("find_fun")), - OnClassWithStartsWithFunction, hasArgument(0, expr().bind("needle"))); + callee(cxxMethodDecl(hasName("rfind"), + ofClass(OnClassWithStartsWithFunction)) + .bind("find_fun")), + hasArgument(0, expr().bind("needle"))); // Case 3: X.compare(0, LEN(Y), Y) [!=]= 0 -> starts_with. const auto CompareExpr = cxxMemberCallExpr( argumentCountIs(3), hasArgument(0, ZeroLiteral), - callee(cxxMethodDecl(hasName("compare")).bind("find_fun")), - OnClassWithStartsWithFunction, hasArgument(2, expr().bind("needle")), + callee(cxxMethodDecl(hasName("compare"), + ofClass(OnClassWithStartsWithFunction)) + .bind("find_fun")), + hasArgument(2, expr().bind("needle")), hasArgument(1, lengthExprForStringNode("needle"))); // Case 4: X.compare(LEN(X) - LEN(Y), LEN(Y), Y) [!=]= 0 -> ends_with. const auto CompareEndsWithExpr = cxxMemberCallExpr( argumentCountIs(3), - callee(cxxMethodDecl(hasName("compare")).bind("find_fun")), - OnClassWithEndsWithFunction, hasArgument(2, expr().bind("needle")), + callee(cxxMethodDecl(hasName("compare"), + ofClass(OnClassWithEndsWithFunction)) + .bind("find_fun")), + on(expr().bind("haystack")), hasArgument(2, expr().bind("needle")), hasArgument(1, lengthExprForStringNode("needle")), hasArgument(0, binaryOperator(hasOperatorName("-"), @@ -145,7 +139,7 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { // All cases comparing to 0. Finder->addMatcher( binaryOperator( - hasAnyOperatorName("==", "!="), + matchers::isEqualityOperator(), hasOperands(cxxMemberCallExpr(anyOf(FindExpr, RFindExpr, CompareExpr, CompareEndsWithExpr)) .bind("find_expr"), @@ -156,7 +150,7 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { // Case 5: X.rfind(Y) [!=]= LEN(X) - LEN(Y) -> ends_with. Finder->addMatcher( binaryOperator( - hasAnyOperatorName("==", "!="), + matchers::isEqualityOperator(), hasOperands( cxxMemberCallExpr( anyOf( @@ -166,8 +160,10 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { 1, anyOf(declRefExpr(to(varDecl(hasName("npos")))), memberExpr(member(hasName("npos"))))))), - callee(cxxMethodDecl(hasName("rfind")).bind("find_fun")), - OnClassWithEndsWithFunction, + callee(cxxMethodDecl(hasName("rfind"), + ofClass(OnClassWithEndsWithFunction)) + .bind("find_fun")), + on(expr().bind("haystack")), hasArgument(0, expr().bind("needle"))) .bind("find_expr"), binaryOperator(hasOperatorName("-"), @@ -190,9 +186,8 @@ void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) { const CXXMethodDecl *ReplacementFunction = StartsWithFunction ? StartsWithFunction : EndsWithFunction; - if (ComparisonExpr->getBeginLoc().isMacroID()) { + if (ComparisonExpr->getBeginLoc().isMacroID()) return; - } const bool Neg = ComparisonExpr->getOpcode() == BO_NE; @@ -220,9 +215,8 @@ void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) { (ReplacementFunction->getName() + "(").str()); // Add possible negation '!'. - if (Neg) { + if (Neg) Diagnostic << FixItHint::CreateInsertion(FindExpr->getBeginLoc(), "!"); - } } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp index 798af260a3b66c..91477241e82e54 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp @@ -32,14 +32,9 @@ struct prefer_underscore_version_flip { size_t find(const char *s, size_t pos = 0) const; }; -struct prefer_underscore_version_inherit : public string_like { - bool startsWith(const char *s) const; -}; - void test(std::string s, std::string_view sv, sub_string ss, sub_sub_string sss, string_like sl, string_like_camel slc, prefer_underscore_version puv, - prefer_underscore_version_flip puvf, - prefer_underscore_version_inherit puvi) { + prefer_underscore_version_flip puvf) { s.find("a") == 0; // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of find() == 0 // CHECK-FIXES: s.starts_with("a"); @@ -153,12 +148,6 @@ void test(std::string s, std::string_view sv, sub_string ss, sub_sub_string sss, // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with // CHECK-FIXES: puvf.starts_with("a"); - // Here, the subclass has startsWith, the superclass has starts_with. - // We prefer the version from the subclass. - puvi.find("a") == 0; - // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use startsWith - // CHECK-FIXES: puvi.startsWith("a"); - s.compare(0, 1, "a") == 0; // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of compare() == 0 // CHECK-FIXES: s.starts_with("a"); diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 8add0a53e5be13..f36a5472b7e17d 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -5505,6 +5505,31 @@ the configuration (without a prefix: ``Auto``). } } +.. _RemoveEmptyLinesInUnwrappedLines: + +**RemoveEmptyLinesInUnwrappedLines** (``Boolean``) :versionbadge:`clang-format 20` :ref:`¶ ` + Remove empty lines within unwrapped lines. + + .. code-block:: c++ + + false: true: + + int c vs. int c = a + b; + + = a + b; + + enum : unsigned vs. enum : unsigned { + AA = 0, + { BB + AA = 0, } myEnum; + BB + } myEnum; + + while ( vs. while (true) { + } + true) { + } + .. _RemoveParentheses: **RemoveParentheses** (``RemoveParenthesesStyle``) :versionbadge:`clang-format 17` :ref:`¶ ` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ad60e48c4dcac5..d80e0aa98974b7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -724,8 +724,10 @@ clang-format - Adds ``BreakBinaryOperations`` option. - Adds ``TemplateNames`` option. - Adds ``AlignFunctionDeclarations`` option to ``AlignConsecutiveDeclarations``. -- Adds ``IndentOnly`` suboption to ``ReflowComments`` to fix the indentation of multi-line comments - without touching their contents, renames ``false`` to ``Never``, and ``true`` to ``Always``. +- Adds ``IndentOnly`` suboption to ``ReflowComments`` to fix the indentation of + multi-line comments without touching their contents, renames ``false`` to + ``Never``, and ``true`` to ``Always``. +- Adds ``RemoveEmptyLinesInUnwrappedLines`` option. libclang -------- diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 72df1e35aaec20..2b80e43b506384 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -138,6 +138,8 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 8, 16, 2) +SVE_VECTOR_TYPE_INT("__clang_svmfloat8x2_t", "svmfloat8x2_t", SveMFloat8x2, SveMFloat8x2Ty, 16, 8, 2, false) + // // x3 // @@ -158,6 +160,8 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 8, 16, 3) +SVE_VECTOR_TYPE_INT("__clang_svmfloat8x3_t", "svmfloat8x3_t", SveMFloat8x3, SveMFloat8x3Ty, 16, 8, 3, false) + // // x4 // @@ -178,6 +182,8 @@ SVE_VECTOR_TYPE_FLOAT("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, Sv SVE_VECTOR_TYPE_BFLOAT("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 8, 16, 4) +SVE_VECTOR_TYPE_INT("__clang_svmfloat8x4_t", "svmfloat8x4_t", SveMFloat8x4, SveMFloat8x4Ty, 16, 8, 4, false) + SVE_PREDICATE_TYPE_ALL("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16, 1) SVE_PREDICATE_TYPE_ALL("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 16, 2) SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4Ty, 16, 4) diff --git a/clang/include/clang/Basic/StackExhaustionHandler.h b/clang/include/clang/Basic/StackExhaustionHandler.h new file mode 100644 index 00000000000000..fb02b9521cb48f --- /dev/null +++ b/clang/include/clang/Basic/StackExhaustionHandler.h @@ -0,0 +1,45 @@ +//===--- StackExhaustionHandler.h - A utility for warning once when close to out +// of stack space -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines a utilitiy for warning once when close to out of stack space. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_STACK_EXHAUSTION_HANDLER_H +#define LLVM_CLANG_BASIC_STACK_EXHAUSTION_HANDLER_H + +#include "clang/Basic/Diagnostic.h" + +namespace clang { +class StackExhaustionHandler { +public: + StackExhaustionHandler(DiagnosticsEngine &diags) : DiagsRef(diags) {} + + /// Run some code with "sufficient" stack space. (Currently, at least 256K + /// is guaranteed). Produces a warning if we're low on stack space and + /// allocates more in that case. Use this in code that may recurse deeply to + /// avoid stack overflow. + void runWithSufficientStackSpace(SourceLocation Loc, + llvm::function_ref Fn); + + /// Check to see if we're low on stack space and produce a warning if we're + /// low on stack space (Currently, at least 256Kis guaranteed). + void warnOnStackNearlyExhausted(SourceLocation Loc); + +private: + /// Warn that the stack is nearly exhausted. + void warnStackExhausted(SourceLocation Loc); + + DiagnosticsEngine &DiagsRef; + bool WarnedStackExhausted = false; +}; +} // end namespace clang + +#endif // LLVM_CLANG_BASIC_STACK_EXHAUSTION_HANDLER_H diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e363c8cfc27f8d..4823d08f7ab9a9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3459,7 +3459,8 @@ def fno_strict_aliasing : Flag<["-"], "fno-strict-aliasing">, Group, def fstruct_path_tbaa : Flag<["-"], "fstruct-path-tbaa">, Group; def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group; def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group; -def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group; +def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group, + Visibility<[ClangOption, FlangOption]>; def fno_pointer_tbaa : Flag<["-"], "fno-pointer-tbaa">, Group; def fno_temp_file : Flag<["-"], "fno-temp-file">, Group, Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>, HelpText< @@ -3475,7 +3476,8 @@ def fno_verbose_asm : Flag<["-"], "fno-verbose-asm">, Group, Visibility<[ClangOption, CC1Option]>, MarshallingInfoNegativeFlag>; def fno_working_directory : Flag<["-"], "fno-working-directory">, Group; -def fno_wrapv : Flag<["-"], "fno-wrapv">, Group; +def fno_wrapv : Flag<["-"], "fno-wrapv">, Group, + Visibility<[ClangOption, FlangOption]>; def fobjc_arc : Flag<["-"], "fobjc-arc">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Synthesize retain and release calls for Objective-C pointers">; @@ -4054,7 +4056,8 @@ defm strict_vtable_pointers : BoolFOption<"strict-vtable-pointers", "Enable optimizations based on the strict rules for" " overwriting polymorphic C++ objects">, NegFlag>; -def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group; +def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group, + Visibility<[ClangOption, FlangOption]>; def fpointer_tbaa : Flag<["-"], "fpointer-tbaa">, Group; def fdriver_only : Flag<["-"], "fdriver-only">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>, @@ -4323,7 +4326,7 @@ defm virtual_function_elimination : BoolFOption<"virtual-function-elimination", NegFlag, BothFlags<[], [ClangOption, CLOption]>>; def fwrapv : Flag<["-"], "fwrapv">, Group, - Visibility<[ClangOption, CC1Option]>, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Treat signed integer overflow as two's complement">; def fwritable_strings : Flag<["-"], "fwritable-strings">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index a0762b088b68ef..debba1c7822839 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3938,6 +3938,29 @@ struct FormatStyle { /// \version 14 bool RemoveBracesLLVM; + /// Remove empty lines within unwrapped lines. + /// \code + /// false: true: + /// + /// int c vs. int c = a + b; + /// + /// = a + b; + /// + /// enum : unsigned vs. enum : unsigned { + /// AA = 0, + /// { BB + /// AA = 0, } myEnum; + /// BB + /// } myEnum; + /// + /// while ( vs. while (true) { + /// } + /// true) { + /// } + /// \endcode + /// \version 20 + bool RemoveEmptyLinesInUnwrappedLines; + /// Types of redundant parentheses to remove. enum RemoveParenthesesStyle : int8_t { /// Do not remove parentheses. @@ -5232,6 +5255,8 @@ struct FormatStyle { RawStringFormats == R.RawStringFormats && ReferenceAlignment == R.ReferenceAlignment && RemoveBracesLLVM == R.RemoveBracesLLVM && + RemoveEmptyLinesInUnwrappedLines == + R.RemoveEmptyLinesInUnwrappedLines && RemoveParentheses == R.RemoveParentheses && RemoveSemicolon == R.RemoveSemicolon && RequiresClausePosition == R.RequiresClausePosition && diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9fc4033dbd013a..460cec6dbb648f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -49,6 +49,7 @@ #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/StackExhaustionHandler.h" #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" @@ -546,9 +547,6 @@ class Sema final : public SemaBase { /// Print out statistics about the semantic analysis. void PrintStats() const; - /// Warn that the stack is nearly exhausted. - void warnStackExhausted(SourceLocation Loc); - /// Run some code with "sufficient" stack space. (Currently, at least 256K is /// guaranteed). Produces a warning if we're low on stack space and allocates /// more in that case. Use this in code that may recurse deeply (for example, @@ -1183,7 +1181,7 @@ class Sema final : public SemaBase { std::optional> CachedDarwinSDKInfo; bool WarnedDarwinSDKInfoMissing = false; - bool WarnedStackExhausted = false; + StackExhaustionHandler StackHandler; Sema(const Sema &) = delete; void operator=(const Sema &) = delete; diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index d735e2dcaa8c56..e397dff097652b 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1149,7 +1149,7 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 506; +const unsigned NUM_PREDEF_TYPE_IDS = 509; // Ensure we do not overrun the predefined types we reserved // in the enum PredefinedTypeIDs above. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index ee4e897b248882..b476a40ebd2c8c 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -19,6 +19,7 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/OpenCLOptions.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/StackExhaustionHandler.h" #include "clang/Basic/Version.h" #include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderSearch.h" @@ -445,7 +446,7 @@ class ASTReader DiagnosticsEngine &Diags; // Sema has duplicate logic, but SemaObj can sometimes be null so ASTReader // has its own version. - bool WarnedStackExhausted = false; + StackExhaustionHandler StackHandler; /// The semantic analysis object that will be processing the /// AST files and the translation unit that uses it. @@ -2180,7 +2181,8 @@ class ASTReader /// Report a diagnostic. DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const; - void warnStackExhausted(SourceLocation Loc); + void runWithSufficientStackSpace(SourceLocation Loc, + llvm::function_ref Fn); IdentifierInfo *DecodeIdentifierInfo(serialization::IdentifierID ID); diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 8ca63bf64aa0ef..a71c0dcc9381e8 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -4535,6 +4535,10 @@ bool Compiler::VisitCallExpr(const CallExpr *E) { return VisitBuiltinCallExpr(E, Builtin::BI__builtin_operator_delete); } } + // Explicit calls to trivial destructors + if (const auto *DD = dyn_cast_if_present(FuncDecl); + DD && DD->isTrivial()) + return true; QualType ReturnType = E->getCallReturnType(Ctx.getASTContext()); std::optional T = classify(ReturnType); diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index a1a92562cc5e3d..f034bde309035f 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -1841,6 +1841,7 @@ bool Init(InterpState &S, CodePtr OpPC) { assert(false); return false; } + Ptr.activate(); Ptr.initialize(); new (&Ptr.deref()) T(Value); return true; @@ -1852,6 +1853,7 @@ bool InitPop(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.pop(); if (!CheckInit(S, OpPC, Ptr)) return false; + Ptr.activate(); Ptr.initialize(); new (&Ptr.deref()) T(Value); return true; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index f083ffff87a8ec..8321cee0e0bc94 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2512,7 +2512,8 @@ bool VarDecl::isUsableInConstantExpressions(const ASTContext &Context) const { if (!DefVD->mightBeUsableInConstantExpressions(Context)) return false; // ... and its initializer is a constant initializer. - if (Context.getLangOpts().CPlusPlus && !DefVD->hasConstantInitialization()) + if ((Context.getLangOpts().CPlusPlus || getLangOpts().C23) && + !DefVD->hasConstantInitialization()) return false; // C++98 [expr.const]p1: // An integral constant-expression can involve only [...] const variables @@ -2619,8 +2620,11 @@ bool VarDecl::hasICEInitializer(const ASTContext &Context) const { } bool VarDecl::hasConstantInitialization() const { - // In C, all globals (and only globals) have constant initialization. - if (hasGlobalStorage() && !getASTContext().getLangOpts().CPlusPlus) + // In C, all globals and constexpr variables should have constant + // initialization. For constexpr variables in C check that initializer is a + // constant initializer because they can be used in constant expressions. + if (hasGlobalStorage() && !getASTContext().getLangOpts().CPlusPlus && + !isConstexpr()) return true; // In C++, it depends on whether the evaluation at the point of definition diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index e7ebc8f191aa6b..e11e1ac4a6fa63 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -89,6 +89,7 @@ add_clang_library(clangBasic SourceManager.cpp SourceMgrAdapter.cpp Stack.cpp + StackExhaustionHandler.cpp TargetID.cpp TargetInfo.cpp Targets.cpp diff --git a/clang/lib/Basic/StackExhaustionHandler.cpp b/clang/lib/Basic/StackExhaustionHandler.cpp new file mode 100644 index 00000000000000..24b499c810dbfe --- /dev/null +++ b/clang/lib/Basic/StackExhaustionHandler.cpp @@ -0,0 +1,35 @@ +//===--- StackExhaustionHandler.cpp - - A utility for warning once when close +// to out of stack space -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines a utilitiy for warning once when close to out of stack space. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/StackExhaustionHandler.h" +#include "clang/Basic/Stack.h" + +void clang::StackExhaustionHandler::runWithSufficientStackSpace( + SourceLocation Loc, llvm::function_ref Fn) { + clang::runWithSufficientStackSpace([&] { warnStackExhausted(Loc); }, Fn); +} + +void clang::StackExhaustionHandler::warnOnStackNearlyExhausted( + SourceLocation Loc) { + if (isStackNearlyExhausted()) + warnStackExhausted(Loc); +} + +void clang::StackExhaustionHandler::warnStackExhausted(SourceLocation Loc) { + // Only warn about this once. + if (!WarnedStackExhausted) { + DiagsRef.Report(Loc, diag::warn_stack_exhausted); + WarnedStackExhausted = true; + } +} diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 870f0f38bc3057..eaaba7642bd7b2 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -100,6 +100,14 @@ bool RISCVTargetInfo::validateAsmConstraint( case 'S': // A symbol or label reference with a constant offset Info.setAllowsRegister(); return true; + case 'c': + // A RVC register - GPR or FPR + if (Name[1] == 'r' || Name[1] == 'f') { + Info.setAllowsRegister(); + Name += 1; + return true; + } + return false; case 'v': // A vector register. if (Name[1] == 'r' || Name[1] == 'd' || Name[1] == 'm') { @@ -114,6 +122,8 @@ bool RISCVTargetInfo::validateAsmConstraint( std::string RISCVTargetInfo::convertConstraint(const char *&Constraint) const { std::string R; switch (*Constraint) { + // c* and v* are two-letter constraints on RISC-V. + case 'c': case 'v': R = std::string("^") + std::string(Constraint, 2); Constraint += 1; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6fcc60f3e4063c..cd7a4f2b09290b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5636,10 +5636,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); - Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); + Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy); return RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, BCast, PacketSize, PacketAlign})); + {Arg0, ACast, PacketSize, PacketAlign})); } else { assert(4 == E->getNumArgs() && "Illegal number of parameters to pipe function"); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 33a0b293f2fce8..e9b90f85611c93 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -346,7 +346,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), Target(C.getTargetInfo()), ABI(createCXXABI(*this)), - VMContext(M.getContext()), VTables(*this), + VMContext(M.getContext()), VTables(*this), StackHandler(diags), SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. @@ -1605,17 +1605,9 @@ void CodeGenModule::ErrorUnsupported(const Decl *D, const char *Type) { getDiags().Report(Context.getFullLoc(D->getLocation()), DiagID) << Msg; } -void CodeGenModule::warnStackExhausted(SourceLocation Loc) { - // Only warn about this once. - if (!WarnedStackExhausted) { - getDiags().Report(Loc, diag::warn_stack_exhausted); - WarnedStackExhausted = true; - } -} - void CodeGenModule::runWithSufficientStackSpace(SourceLocation Loc, llvm::function_ref Fn) { - clang::runWithSufficientStackSpace([&] { warnStackExhausted(Loc); }, Fn); + StackHandler.runWithSufficientStackSpace(Loc, Fn); } llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 24013898859159..1756c9fa509aaa 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -26,6 +26,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/NoSanitizeList.h" #include "clang/Basic/ProfileList.h" +#include "clang/Basic/StackExhaustionHandler.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/XRayLists.h" #include "clang/Lex/PreprocessorOptions.h" @@ -463,7 +464,7 @@ class CodeGenModule : public CodeGenTypeCache { std::unique_ptr PGOReader; InstrProfStats PGOStats; std::unique_ptr SanStats; - bool WarnedStackExhausted = false; + StackExhaustionHandler StackHandler; /// Statement for which Xteam reduction code is being generated currently const Stmt *CurrentXteamRedStmt = nullptr; @@ -1440,9 +1441,6 @@ class CodeGenModule : public CodeGenTypeCache { /// Print out an error that codegen doesn't support the specified decl yet. void ErrorUnsupported(const Decl *D, const char *Type); - /// Warn that the stack is nearly exhausted. - void warnStackExhausted(SourceLocation Loc); - /// Run some code with "sufficient" stack space. (Currently, at least 256K is /// guaranteed). Produces a warning if we're low on stack space and allocates /// more in that case. Use this in code that may recurse deeply to avoid stack diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 6c2a6f9ba66fe7..89f9457523824a 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -3437,7 +3437,7 @@ class ItaniumRTTIBuilder { llvm::Constant *GetAddrOfExternalRTTIDescriptor(QualType Ty); /// BuildVTablePointer - Build the vtable pointer for the given type. - void BuildVTablePointer(const Type *Ty); + void BuildVTablePointer(const Type *Ty, llvm::Constant *StorageAddress); /// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single /// inheritance, according to the Itanium C++ ABI, 2.9.5p6b. @@ -3834,7 +3834,8 @@ static bool CanUseSingleInheritance(const CXXRecordDecl *RD) { return true; } -void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { +void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty, + llvm::Constant *StorageAddress) { // abi::__class_type_info. static const char * const ClassTypeInfo = "_ZTVN10__cxxabiv117__class_type_infoE"; @@ -3981,9 +3982,12 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { VTable, Two); } - if (auto &Schema = CGM.getCodeGenOpts().PointerAuth.CXXTypeInfoVTablePointer) - VTable = CGM.getConstantSignedPointer(VTable, Schema, nullptr, GlobalDecl(), - QualType(Ty, 0)); + if (const auto &Schema = + CGM.getCodeGenOpts().PointerAuth.CXXTypeInfoVTablePointer) + VTable = CGM.getConstantSignedPointer( + VTable, Schema, + Schema.isAddressDiscriminated() ? StorageAddress : nullptr, + GlobalDecl(), QualType(Ty, 0)); Fields.push_back(VTable); } @@ -4099,8 +4103,18 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( llvm::GlobalVariable::LinkageTypes Linkage, llvm::GlobalValue::VisibilityTypes Visibility, llvm::GlobalValue::DLLStorageClassTypes DLLStorageClass) { + SmallString<256> Name; + llvm::raw_svector_ostream Out(Name); + CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out); + llvm::Module &M = CGM.getModule(); + llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name); + // int8 is an arbitrary type to be replaced later with replaceInitializer. + llvm::GlobalVariable *GV = + new llvm::GlobalVariable(M, CGM.Int8Ty, /*isConstant=*/true, Linkage, + /*Initializer=*/nullptr, Name); + // Add the vtable pointer. - BuildVTablePointer(cast(Ty)); + BuildVTablePointer(cast(Ty), GV); // And the name. llvm::GlobalVariable *TypeName = GetAddrOfTypeName(Ty, Linkage); @@ -4218,16 +4232,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( llvm_unreachable("HLSL doesn't support RTTI"); } - llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields); - - SmallString<256> Name; - llvm::raw_svector_ostream Out(Name); - CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out); - llvm::Module &M = CGM.getModule(); - llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name); - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(M, Init->getType(), - /*isConstant=*/true, Linkage, Init, Name); + GV->replaceInitializer(llvm::ConstantStruct::getAnon(Fields)); // Export the typeinfo in the same circumstances as the vtable is exported. auto GVDLLStorageClass = DLLStorageClass; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 87477665876595..d34690edc3eb1e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3650,7 +3650,7 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, StringRef Value = A->getValue(); if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() && !EffectiveTriple.isARM() && !EffectiveTriple.isThumb() && - !EffectiveTriple.isRISCV()) + !EffectiveTriple.isRISCV() && !EffectiveTriple.isPPC()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; if ((EffectiveTriple.isX86() || EffectiveTriple.isARM() || @@ -3690,7 +3690,7 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, << A->getOption().getName() << Value << "sysreg global"; return; } - if (EffectiveTriple.isRISCV()) { + if (EffectiveTriple.isRISCV() || EffectiveTriple.isPPC()) { if (Value != "tls" && Value != "global") { D.Diag(diag::err_drv_invalid_value_with_suggestion) << A->getOption().getName() << Value << "tls global"; @@ -3711,7 +3711,7 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, StringRef Value = A->getValue(); if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() && !EffectiveTriple.isARM() && !EffectiveTriple.isThumb() && - !EffectiveTriple.isRISCV()) + !EffectiveTriple.isRISCV() && !EffectiveTriple.isPPC()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; int Offset; @@ -3731,7 +3731,7 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_reg_EQ)) { StringRef Value = A->getValue(); if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() && - !EffectiveTriple.isRISCV()) + !EffectiveTriple.isRISCV() && !EffectiveTriple.isPPC()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; if (EffectiveTriple.isX86() && (Value != "fs" && Value != "gs")) { @@ -3748,6 +3748,16 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, << A->getOption().getName() << Value << "tp"; return; } + if (EffectiveTriple.isPPC64() && Value != "r13") { + D.Diag(diag::err_drv_invalid_value_with_suggestion) + << A->getOption().getName() << Value << "r13"; + return; + } + if (EffectiveTriple.isPPC32() && Value != "r2") { + D.Diag(diag::err_drv_invalid_value_with_suggestion) + << A->getOption().getName() << Value << "r2"; + return; + } A->render(Args, CmdArgs); } @@ -7075,16 +7085,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ); - // -fno-strict-overflow implies -fwrapv if it isn't disabled, but - // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. - if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { - if (A->getOption().matches(options::OPT_fwrapv)) - CmdArgs.push_back("-fwrapv"); - } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow, - options::OPT_fno_strict_overflow)) { - if (A->getOption().matches(options::OPT_fno_strict_overflow)) - CmdArgs.push_back("-fwrapv"); - } + // Handle -f[no-]wrapv and -f[no-]strict-overflow, which are used by both + // clang and flang. + renderCommonIntegerOverflowOptions(Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops, options::OPT_fno_finite_loops); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index efc8179fad65e9..e28b2ee6431f31 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3247,3 +3247,17 @@ bool tools::shouldRecordCommandLine(const ToolChain &TC, return FRecordCommandLine || TC.UseDwarfDebugFlags() || GRecordCommandLine; } + +void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, + ArgStringList &CmdArgs) { + // -fno-strict-overflow implies -fwrapv if it isn't disabled, but + // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. + if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { + if (A->getOption().matches(options::OPT_fwrapv)) + CmdArgs.push_back("-fwrapv"); + } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow, + options::OPT_fno_strict_overflow)) { + if (A->getOption().matches(options::OPT_fno_strict_overflow)) + CmdArgs.push_back("-fwrapv"); + } +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 322936e6e06dfe..9162649e0b24ab 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -286,6 +286,9 @@ bool shouldRecordCommandLine(const ToolChain &TC, bool &FRecordCommandLine, bool &GRecordCommandLine); +void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs); + } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 22c94e859bdd6c..5a3127498cb06c 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -866,6 +866,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, } } + renderCommonIntegerOverflowOptions(Args, CmdArgs); + assert((Output.isFilename() || Output.isNothing()) && "Invalid output."); if (Output.isFilename()) { CmdArgs.push_back("-o"); diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 148270795c562f..c612960ff37ac8 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1104,6 +1104,8 @@ template <> struct MappingTraits { IO.mapOptional("ReferenceAlignment", Style.ReferenceAlignment); IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("RemoveBracesLLVM", Style.RemoveBracesLLVM); + IO.mapOptional("RemoveEmptyLinesInUnwrappedLines", + Style.RemoveEmptyLinesInUnwrappedLines); IO.mapOptional("RemoveParentheses", Style.RemoveParentheses); IO.mapOptional("RemoveSemicolon", Style.RemoveSemicolon); IO.mapOptional("RequiresClausePosition", Style.RequiresClausePosition); @@ -1582,6 +1584,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.ReferenceAlignment = FormatStyle::RAS_Pointer; LLVMStyle.ReflowComments = FormatStyle::RCS_Always; LLVMStyle.RemoveBracesLLVM = false; + LLVMStyle.RemoveEmptyLinesInUnwrappedLines = false; LLVMStyle.RemoveParentheses = FormatStyle::RPS_Leave; LLVMStyle.RemoveSemicolon = false; LLVMStyle.RequiresClausePosition = FormatStyle::RCPS_OwnLine; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index fcefaa7bb298ea..13037b6d00604b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -5509,8 +5509,10 @@ static bool isAllmanLambdaBrace(const FormatToken &Tok) { bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) const { const FormatToken &Left = *Right.Previous; - if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) + if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0 && + (!Style.RemoveEmptyLinesInUnwrappedLines || &Right == Line.First)) { return true; + } if (Style.BreakFunctionDefinitionParameters && Line.MightBeFunctionDecl && Line.mightBeFunctionDefinition() && Left.MightBeFunctionDeclParen && diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c9625c39e527b4..bda9850670ab06 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2504,6 +2504,11 @@ bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { // Assume there are no blocks inside a braced init list apart // from the ones we explicitly parse out (like lambdas). FormatTok->setBlockKind(BK_BracedInit); + if (!IsAngleBracket) { + auto *Prev = FormatTok->Previous; + if (Prev && Prev->is(tok::greater)) + Prev->setFinalizedType(TT_TemplateCloser); + } nextToken(); parseBracedList(); break; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index bc18e503849a25..8d076961df35cd 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -220,7 +220,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, AnalysisWarnings(*this), ThreadSafetyDeclCache(nullptr), LateTemplateParser(nullptr), LateTemplateParserCleanup(nullptr), OpaqueParser(nullptr), CurContext(nullptr), ExternalSource(nullptr), - CurScope(nullptr), Ident_super(nullptr), + StackHandler(Diags), CurScope(nullptr), Ident_super(nullptr), AMDGPUPtr(std::make_unique(*this)), ARMPtr(std::make_unique(*this)), AVRPtr(std::make_unique(*this)), @@ -562,17 +562,9 @@ Sema::~Sema() { SemaPPCallbackHandler->reset(); } -void Sema::warnStackExhausted(SourceLocation Loc) { - // Only warn about this once. - if (!WarnedStackExhausted) { - Diag(Loc, diag::warn_stack_exhausted); - WarnedStackExhausted = true; - } -} - void Sema::runWithSufficientStackSpace(SourceLocation Loc, llvm::function_ref Fn) { - clang::runWithSufficientStackSpace([&] { warnStackExhausted(Loc); }, Fn); + StackHandler.runWithSufficientStackSpace(Loc, Fn); } bool Sema::makeUnavailableInSystemHeader(SourceLocation loc, diff --git a/clang/lib/Sema/SemaFunctionEffects.cpp b/clang/lib/Sema/SemaFunctionEffects.cpp index 70e5d78661a835..3fa326db06ee41 100644 --- a/clang/lib/Sema/SemaFunctionEffects.cpp +++ b/clang/lib/Sema/SemaFunctionEffects.cpp @@ -1540,6 +1540,7 @@ bool Sema::FunctionEffectDiff::shouldDiagnoseConversion( // matching is better. return true; } + break; case FunctionEffect::Kind::Blocking: case FunctionEffect::Kind::Allocating: return false; @@ -1563,6 +1564,7 @@ bool Sema::FunctionEffectDiff::shouldDiagnoseRedeclaration( // All these forms of mismatches are diagnosed. return true; } + break; case FunctionEffect::Kind::Blocking: case FunctionEffect::Kind::Allocating: return false; @@ -1592,7 +1594,7 @@ Sema::FunctionEffectDiff::shouldDiagnoseMethodOverride( case Kind::ConditionMismatch: return OverrideResult::Warn; } - + break; case FunctionEffect::Kind::Blocking: case FunctionEffect::Kind::Allocating: return OverrideResult::NoAction; diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index 3da4b515b1b114..d1ccc2774152b1 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -623,7 +623,12 @@ bool SemaRISCV::CheckBuiltinFunctionCall(const TargetInfo &TI, ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo( TheCall->getType()->castAs()); - if (Context.getTypeSize(Info.ElementType) == 64 && !TI.hasFeature("v")) + const FunctionDecl *FD = SemaRef.getCurFunctionDecl(); + llvm::StringMap FunctionFeatureMap; + Context.getFunctionFeatureMap(FunctionFeatureMap, FD); + + if (Context.getTypeSize(Info.ElementType) == 64 && !TI.hasFeature("v") && + !FunctionFeatureMap.lookup("v")) return Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension) << /* IsExtension */ true << TheCall->getSourceRange() << "v"; diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 8665c099903dc3..457a9968c32a4a 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -806,8 +806,7 @@ void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { // Check to see if we're low on stack space. We can't do anything about this // from here, but we can at least warn the user. - if (isStackNearlyExhausted()) - warnStackExhausted(Ctx.PointOfInstantiation); + StackHandler.warnOnStackNearlyExhausted(Ctx.PointOfInstantiation); } void Sema::popCodeSynthesisContext() { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 1b2473f2457344..1cf6c9352f3686 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -64,6 +64,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceManagerInternals.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/Stack.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/TokenKinds.h" @@ -9648,18 +9649,15 @@ DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, unsigned DiagID) const { return Diags.Report(Loc, DiagID); } -void ASTReader::warnStackExhausted(SourceLocation Loc) { +void ASTReader::runWithSufficientStackSpace(SourceLocation Loc, + llvm::function_ref Fn) { // When Sema is available, avoid duplicate errors. if (SemaObj) { - SemaObj->warnStackExhausted(Loc); + SemaObj->runWithSufficientStackSpace(Loc, Fn); return; } - if (WarnedStackExhausted) - return; - WarnedStackExhausted = true; - - Diag(Loc, diag::warn_stack_exhausted); + StackHandler.runWithSufficientStackSpace(Loc, Fn); } /// Retrieve the identifier table associated with the @@ -10509,13 +10507,14 @@ ASTReader::ASTReader(Preprocessor &PP, InMemoryModuleCache &ModuleCache, bool AllowConfigurationMismatch, bool ValidateSystemInputs, bool ValidateASTInputFilesContent, bool UseGlobalIndex, std::unique_ptr ReadTimer) - : Listener(bool(DisableValidationKind &DisableValidationForModuleKind::PCH) + : Listener(bool(DisableValidationKind & DisableValidationForModuleKind::PCH) ? cast(new SimpleASTReaderListener(PP)) : cast(new PCHValidator(PP, *this))), SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()), - PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP), - ContextObj(Context), ModuleMgr(PP.getFileManager(), ModuleCache, - PCHContainerRdr, PP.getHeaderSearchInfo()), + PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), + StackHandler(Diags), PP(PP), ContextObj(Context), + ModuleMgr(PP.getFileManager(), ModuleCache, PCHContainerRdr, + PP.getHeaderSearchInfo()), DummyIdResolver(PP), ReadTimer(std::move(ReadTimer)), isysroot(isysroot), DisableValidationKind(DisableValidationKind), AllowASTWithCompilerErrors(AllowASTWithCompilerErrors), diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 1ccc810f415eb4..d4e392dcc6bcd0 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4168,8 +4168,7 @@ Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) { D->setDeclContext(Context.getTranslationUnitDecl()); // Reading some declarations can result in deep recursion. - clang::runWithSufficientStackSpace([&] { warnStackExhausted(DeclLoc); }, - [&] { Reader.Visit(D); }); + runWithSufficientStackSpace(DeclLoc, [&] { Reader.Visit(D); }); // If this declaration is also a declaration context, get the // offsets for its tables of lexical and visible declarations. diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 6bd83f2372eab6..5673b5cba3f700 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -300,3 +300,27 @@ namespace UsedToCrash { } int alloc1 = (alloc(), 0); } + +constexpr bool change_union_member() { + union U { + int a; + int b; + }; + U u = {.a = 1}; + std::construct_at(&u.b, 2); + return u.b == 2; +} +static_assert(change_union_member()); + +namespace PR48606 { + struct A { mutable int n = 0; }; + + constexpr bool f() { + A a; + A *p = &a; + p->~A(); + std::construct_at(p); + return true; + } + static_assert(f()); +} diff --git a/clang/test/CodeGen/RISCV/riscv-inline-asm.c b/clang/test/CodeGen/RISCV/riscv-inline-asm.c index fa0bf6aa6aa471..75b91d3c497c50 100644 --- a/clang/test/CodeGen/RISCV/riscv-inline-asm.c +++ b/clang/test/CodeGen/RISCV/riscv-inline-asm.c @@ -3,7 +3,35 @@ // RUN: %clang_cc1 -triple riscv64 -O2 -emit-llvm %s -o - \ // RUN: | FileCheck %s -// Test RISC-V specific inline assembly constraints. +// Test RISC-V specific inline assembly constraints and modifiers. + +long test_r(long x) { +// CHECK-LABEL: define{{.*}} {{i64|i32}} @test_r( +// CHECK: call {{i64|i32}} asm sideeffect "", "=r,r"({{i64|i32}} %{{.*}}) + long ret; + asm volatile ("" : "=r"(ret) : "r"(x)); +// CHECK: call {{i64|i32}} asm sideeffect "", "=r,r"({{i64|i32}} %{{.*}}) + asm volatile ("" : "=r"(ret) : "r"(x)); + return ret; +} + +long test_cr(long x) { +// CHECK-LABEL: define{{.*}} {{i64|i32}} @test_cr( +// CHECK: call {{i64|i32}} asm sideeffect "", "=^cr,^cr"({{i64|i32}} %{{.*}}) + long ret; + asm volatile ("" : "=cr"(ret) : "cr"(x)); + return ret; +} + +float cf; +double cd; +void test_cf(float f, double d) { +// CHECK-LABEL: define{{.*}} void @test_cf( +// CHECK: call float asm sideeffect "", "=^cf,^cf"(float %{{.*}}) + asm volatile("" : "=cf"(cf) : "cf"(f)); +// CHECK: call double asm sideeffect "", "=^cf,^cf"(double %{{.*}}) + asm volatile("" : "=cf"(cd) : "cf"(d)); +} void test_I(void) { // CHECK-LABEL: define{{.*}} void @test_I() @@ -58,3 +86,13 @@ void test_s(void) { asm("// %0 %1 %2" :: "S"(&var), "S"(&arr[1][1]), "S"(test_s)); } + +// CHECK-LABEL: test_modifiers( +// CHECK: call void asm sideeffect "// ${0:i} ${1:i}", "r,r"({{i32|i64}} %val, i32 37) +// CHECK: call void asm sideeffect "// ${0:z} ${1:z}", "i,i"(i32 0, i32 1) +// CHECK: call void asm sideeffect "// ${0:N}", "r"({{i32|i64}} %val) +void test_modifiers(long val) { + asm volatile("// %i0 %i1" :: "r"(val), "r"(37)); + asm volatile("// %z0 %z1" :: "i"(0), "i"(1)); + asm volatile("// %N0" :: "r"(val)); +} diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c index b94f9641decc8e..8e5f015647e414 100644 --- a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -153,39 +153,39 @@ _Complex long double test_cargl(_Complex long double cld) { int ilogbl(long double a); // CHECK-LABEL: define dso_local i32 @test_ilogb( -// CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] // // CHECK-WIN64-LABEL: define dso_local i32 @test_ilogb( -// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-WIN64: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] // // CHECK-I686-LABEL: define dso_local i32 @test_ilogb( -// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-I686: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA3]] // // CHECK-PPC-LABEL: define dso_local i32 @test_ilogb( -// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-PPC: [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] // // CHECK-ARM-LABEL: define dso_local i32 @test_ilogb( -// CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-ARM: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] // // CHECK-ARM-HF-LABEL: define dso_local i32 @test_ilogb( -// CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-ARM-HF: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] // // CHECK-THUMB-LABEL: define i32 @test_ilogb( -// CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-THUMB: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] // // CHECK-AARCH-LABEL: define dso_local i32 @test_ilogb( -// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-AARCH: [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA2]] // // CHECK-SPIR-LABEL: define dso_local spir_func i32 @test_ilogb( -// CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-SPIR: [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] // // CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb( diff --git a/clang/test/CodeGen/stack-protector-guard.c b/clang/test/CodeGen/stack-protector-guard.c index 4777367c94e733..82616ae800c426 100644 --- a/clang/test/CodeGen/stack-protector-guard.c +++ b/clang/test/CodeGen/stack-protector-guard.c @@ -12,6 +12,12 @@ // RUN: %clang_cc1 -mstack-protector-guard=tls -triple riscv64-unknown-elf \ // RUN: -mstack-protector-guard-offset=44 -mstack-protector-guard-reg=tp \ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=RISCV +// RUN: %clang_cc1 -mstack-protector-guard=tls -triple powerpc64-unknown-elf \ +// RUN: -mstack-protector-guard-offset=52 -mstack-protector-guard-reg=r13 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC64 +// RUN: %clang_cc1 -mstack-protector-guard=tls -triple ppc32-unknown-elf \ +// RUN: -mstack-protector-guard-offset=16 -mstack-protector-guard-reg=r2 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC32 void foo(int*); void bar(int x) { int baz[x]; @@ -31,3 +37,13 @@ void bar(int x) { // RISCV: [[ATTR1]] = !{i32 1, !"stack-protector-guard", !"tls"} // RISCV: [[ATTR2]] = !{i32 1, !"stack-protector-guard-reg", !"tp"} // RISCV: [[ATTR3]] = !{i32 1, !"stack-protector-guard-offset", i32 44} + +// POWERPC64: !llvm.module.flags = !{{{.*}}[[ATTR1:![0-9]+]], [[ATTR2:![0-9]+]], [[ATTR3:![0-9]+]], [[ATTR4:![0-9]+]]} +// POWERPC64: [[ATTR2]] = !{i32 1, !"stack-protector-guard", !"tls"} +// POWERPC64: [[ATTR3]] = !{i32 1, !"stack-protector-guard-reg", !"r13"} +// POWERPC64: [[ATTR4]] = !{i32 1, !"stack-protector-guard-offset", i32 52} + +// POWERPC32: !llvm.module.flags = !{{{.*}}[[ATTR1:![0-9]+]], [[ATTR2:![0-9]+]], [[ATTR3:![0-9]+]], [[ATTR4:![0-9]+]]} +// POWERPC32: [[ATTR2]] = !{i32 1, !"stack-protector-guard", !"tls"} +// POWERPC32: [[ATTR3]] = !{i32 1, !"stack-protector-guard-reg", !"r2"} +// POWERPC32: [[ATTR4]] = !{i32 1, !"stack-protector-guard-offset", i32 16} diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/child-inheritted-from-parent-in-comdat.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/child-inheritted-from-parent-in-comdat.cpp index bb86d459b02eaf..e6a945618badc4 100644 --- a/clang/test/CodeGenCXX/RelativeVTablesABI/child-inheritted-from-parent-in-comdat.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/child-inheritted-from-parent-in-comdat.cpp @@ -4,8 +4,8 @@ // RUN: %clang_cc1 %s -triple=aarch64-unknown-fuchsia -O1 -o - -emit-llvm -fhalf-no-semantic-interposition | FileCheck %s // The inline function is emitted in each module with the same comdat -// CHECK: $_ZTS1A = comdat any // CHECK: $_ZTI1A = comdat any +// CHECK: $_ZTS1A = comdat any // CHECK: $_ZTI1B.rtti_proxy = comdat any // The VTable is emitted everywhere used diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/inlined-key-function.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/inlined-key-function.cpp index d5d9a85d4e22f4..70f8289e9df37a 100644 --- a/clang/test/CodeGenCXX/RelativeVTablesABI/inlined-key-function.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/inlined-key-function.cpp @@ -4,8 +4,8 @@ // RUN: %clang_cc1 %s -triple=aarch64-unknown-fuchsia -O1 -o - -emit-llvm | FileCheck %s // CHECK: $_ZTV1A = comdat any -// CHECK: $_ZTS1A = comdat any // CHECK: $_ZTI1A = comdat any +// CHECK: $_ZTS1A = comdat any // CHECK: $_ZTI1A.rtti_proxy = comdat any // The VTable is linkonce_odr and in a comdat here bc it’s key function is inline defined. diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/parent-and-child-in-comdats.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/parent-and-child-in-comdats.cpp index a033ac41868f56..c1b9a9398219a8 100644 --- a/clang/test/CodeGenCXX/RelativeVTablesABI/parent-and-child-in-comdats.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/parent-and-child-in-comdats.cpp @@ -8,12 +8,12 @@ // CHECK: $_ZN1A3fooEv = comdat any // CHECK: $_ZN1B3fooEv = comdat any // CHECK: $_ZTV1A = comdat any -// CHECK: $_ZTS1A = comdat any // CHECK: $_ZTI1A = comdat any +// CHECK: $_ZTS1A = comdat any // CHECK: $_ZTI1A.rtti_proxy = comdat any // CHECK: $_ZTV1B = comdat any -// CHECK: $_ZTS1B = comdat any // CHECK: $_ZTI1B = comdat any +// CHECK: $_ZTS1B = comdat any // CHECK: $_ZTI1B.rtti_proxy = comdat any // Both the vtables for A and B are emitted and in their own comdats. diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/parent-vtable-in-comdat.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/parent-vtable-in-comdat.cpp index 341c53146d476d..d6eda793cc5b4b 100644 --- a/clang/test/CodeGenCXX/RelativeVTablesABI/parent-vtable-in-comdat.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/parent-vtable-in-comdat.cpp @@ -7,17 +7,17 @@ // A::foo() has a comdat since it is an inline function // CHECK: $_ZN1A3fooEv = comdat any // CHECK: $_ZTV1A = comdat any +// CHECK: $_ZTI1A = comdat any // CHECK: $_ZTS1A = comdat any // The VTable for A has its own comdat section bc it has no key function -// CHECK: $_ZTI1A = comdat any // CHECK: $_ZTI1A.rtti_proxy = comdat any // The VTable for A is emitted here and in a comdat section since it has no key function, and is used in this module when creating an instance of A. // CHECK: @_ZTV1A.local = linkonce_odr hidden unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint (ptr @_ZTI1A.rtti_proxy to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1A3fooEv to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, comdat($_ZTV1A), align 4 +// CHECK: @_ZTI1A = linkonce_odr constant { ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 8), ptr @_ZTS1A }, comdat, align 8 // CHECK: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // CHECK: @_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00", comdat, align 1 -// CHECK: @_ZTI1A = linkonce_odr constant { ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 8), ptr @_ZTS1A }, comdat, align 8 // CHECK: @_ZTI1A.rtti_proxy = linkonce_odr hidden unnamed_addr constant ptr @_ZTI1A, comdat // CHECK: @_ZTV1A = linkonce_odr unnamed_addr alias { [3 x i32] }, ptr @_ZTV1A.local diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/simple-vtable-definition.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/simple-vtable-definition.cpp index ad8018ee176712..9dcb1c30e56275 100644 --- a/clang/test/CodeGenCXX/RelativeVTablesABI/simple-vtable-definition.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/simple-vtable-definition.cpp @@ -9,9 +9,9 @@ // The vtable definition itself is private so we can take relative references to // it. The vtable symbol will be exposed through a public alias. // CHECK: @_ZTV1A.local = internal unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint (ptr @_ZTI1A.rtti_proxy to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1A3fooEv to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4 +// CHECK: @_ZTI1A ={{.*}} constant { ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 8), ptr @_ZTS1A }, align 8 // CHECK: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // CHECK: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 -// CHECK: @_ZTI1A ={{.*}} constant { ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 8), ptr @_ZTS1A }, align 8 // The rtti should be in a comdat // CHECK: @_ZTI1A.rtti_proxy = {{.*}}comdat diff --git a/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp b/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp index fc5ee5096433ed..c471e5dbd7b33c 100644 --- a/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp +++ b/clang/test/CodeGenCXX/RelativeVTablesABI/type-info.cpp @@ -5,12 +5,12 @@ // CHECK: $_ZTI1A.rtti_proxy = comdat any // CHECK: $_ZTI1B.rtti_proxy = comdat any +// CHECK: @_ZTI1A ={{.*}} constant { ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 8), ptr @_ZTS1A }, align 8 // CHECK: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // CHECK: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 -// CHECK: @_ZTI1A ={{.*}} constant { ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 8), ptr @_ZTS1A }, align 8 +// CHECK: @_ZTI1B ={{.*}} constant { ptr, ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 8), ptr @_ZTS1B, ptr @_ZTI1A }, align 8 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external global [0 x ptr] // CHECK: @_ZTS1B ={{.*}} constant [3 x i8] c"1B\00", align 1 -// CHECK: @_ZTI1B ={{.*}} constant { ptr, ptr, ptr } { ptr getelementptr inbounds (i8, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 8), ptr @_ZTS1B, ptr @_ZTI1A }, align 8 // CHECK: @_ZTI1A.rtti_proxy = linkonce_odr hidden unnamed_addr constant ptr @_ZTI1A, comdat // CHECK: @_ZTI1B.rtti_proxy = linkonce_odr hidden unnamed_addr constant ptr @_ZTI1B, comdat diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp index 3f2b0622d55162..9f481e1f0f0857 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp @@ -59,6 +59,9 @@ void f(__clang_svbfloat16x3_t, __clang_svbfloat16x3_t); void f(__clang_svbfloat16x4_t, __clang_svbfloat16x4_t); void f(__clang_svboolx2_t, __clang_svboolx2_t); void f(__clang_svboolx4_t, __clang_svboolx4_t); +void f(__clang_svmfloat8x2_t, __clang_svmfloat8x2_t); +void f(__clang_svmfloat8x3_t, __clang_svmfloat8x3_t); +void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t); // CHECK-LABEL: define dso_local void @_Z3foov( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -139,6 +142,12 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // CHECK-NEXT: [[COERCE73:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[COERCE74:%.*]] = alloca { , , , }, align 2 // CHECK-NEXT: [[COERCE75:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[COERCE76:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE77:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE78:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE79:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE80:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE81:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: call void @_Z1fu10__SVInt8_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_( zeroinitializer, zeroinitializer) @@ -151,7 +160,7 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // CHECK-NEXT: call void @_Z1fu13__SVFloat16_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu13__SVFloat32_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu13__SVFloat64_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1fu13__SVMfloat8_tS_( zeroinitializer, zeroinitializer) +// CHECK-NEXT: call void @_Z1fu13__SVMfloat8_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu14__SVBfloat16_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu10__SVBool_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVCount_tS_(target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer) @@ -573,6 +582,39 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // CHECK-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 2 // CHECK-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 3 // CHECK-NEXT: call void @_Z1f10svboolx4_tS_( [[COERCE74_EXTRACT0]], [[COERCE74_EXTRACT1]], [[COERCE74_EXTRACT2]], [[COERCE74_EXTRACT3]], [[COERCE75_EXTRACT0]], [[COERCE75_EXTRACT1]], [[COERCE75_EXTRACT2]], [[COERCE75_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE76]], align 16 +// CHECK-NEXT: [[COERCE76_TUPLE:%.*]] = load { , }, ptr [[COERCE76]], align 16 +// CHECK-NEXT: [[COERCE76_EXTRACT0:%.*]] = extractvalue { , } [[COERCE76_TUPLE]], 0 +// CHECK-NEXT: [[COERCE76_EXTRACT1:%.*]] = extractvalue { , } [[COERCE76_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE77]], align 16 +// CHECK-NEXT: [[COERCE77_TUPLE:%.*]] = load { , }, ptr [[COERCE77]], align 16 +// CHECK-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { , } [[COERCE77_TUPLE]], 0 +// CHECK-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { , } [[COERCE77_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f13svmfloat8x2_tS_( [[COERCE76_EXTRACT0]], [[COERCE76_EXTRACT1]], [[COERCE77_EXTRACT0]], [[COERCE77_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE78]], align 16 +// CHECK-NEXT: [[COERCE78_TUPLE:%.*]] = load { , , }, ptr [[COERCE78]], align 16 +// CHECK-NEXT: [[COERCE78_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE78_TUPLE]], 0 +// CHECK-NEXT: [[COERCE78_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE78_TUPLE]], 1 +// CHECK-NEXT: [[COERCE78_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE78_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE79]], align 16 +// CHECK-NEXT: [[COERCE79_TUPLE:%.*]] = load { , , }, ptr [[COERCE79]], align 16 +// CHECK-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE79_TUPLE]], 0 +// CHECK-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE79_TUPLE]], 1 +// CHECK-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE79_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f13svmfloat8x3_tS_( [[COERCE78_EXTRACT0]], [[COERCE78_EXTRACT1]], [[COERCE78_EXTRACT2]], [[COERCE79_EXTRACT0]], [[COERCE79_EXTRACT1]], [[COERCE79_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE80]], align 16 +// CHECK-NEXT: [[COERCE80_TUPLE:%.*]] = load { , , , }, ptr [[COERCE80]], align 16 +// CHECK-NEXT: [[COERCE80_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 0 +// CHECK-NEXT: [[COERCE80_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 1 +// CHECK-NEXT: [[COERCE80_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 2 +// CHECK-NEXT: [[COERCE80_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE81]], align 16 +// CHECK-NEXT: [[COERCE81_TUPLE:%.*]] = load { , , , }, ptr [[COERCE81]], align 16 +// CHECK-NEXT: [[COERCE81_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 0 +// CHECK-NEXT: [[COERCE81_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 1 +// CHECK-NEXT: [[COERCE81_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 2 +// CHECK-NEXT: [[COERCE81_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f13svmfloat8x4_tS_( [[COERCE80_EXTRACT0]], [[COERCE80_EXTRACT1]], [[COERCE80_EXTRACT2]], [[COERCE80_EXTRACT3]], [[COERCE81_EXTRACT0]], [[COERCE81_EXTRACT1]], [[COERCE81_EXTRACT2]], [[COERCE81_EXTRACT3]]) // CHECK-NEXT: ret void // // COMPAT_17-LABEL: define dso_local void @_Z3foov( @@ -654,6 +696,12 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // COMPAT_17-NEXT: [[COERCE73:%.*]] = alloca { , }, align 2 // COMPAT_17-NEXT: [[COERCE74:%.*]] = alloca { , , , }, align 2 // COMPAT_17-NEXT: [[COERCE75:%.*]] = alloca { , , , }, align 2 +// COMPAT_17-NEXT: [[COERCE76:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE77:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE78:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE79:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE80:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE81:%.*]] = alloca { , , , }, align 16 // COMPAT_17-NEXT: call void @_Z1fu10__SVInt8_tu10__SVInt8_t( zeroinitializer, zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t( zeroinitializer, zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t( zeroinitializer, zeroinitializer) @@ -1088,6 +1136,39 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // COMPAT_17-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 2 // COMPAT_17-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 3 // COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t( [[COERCE74_EXTRACT0]], [[COERCE74_EXTRACT1]], [[COERCE74_EXTRACT2]], [[COERCE74_EXTRACT3]], [[COERCE75_EXTRACT0]], [[COERCE75_EXTRACT1]], [[COERCE75_EXTRACT2]], [[COERCE75_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE76]], align 16 +// COMPAT_17-NEXT: [[COERCE76_TUPLE:%.*]] = load { , }, ptr [[COERCE76]], align 16 +// COMPAT_17-NEXT: [[COERCE76_EXTRACT0:%.*]] = extractvalue { , } [[COERCE76_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE76_EXTRACT1:%.*]] = extractvalue { , } [[COERCE76_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE77]], align 16 +// COMPAT_17-NEXT: [[COERCE77_TUPLE:%.*]] = load { , }, ptr [[COERCE77]], align 16 +// COMPAT_17-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { , } [[COERCE77_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { , } [[COERCE77_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x2_t13svmfloat8x2_t( [[COERCE76_EXTRACT0]], [[COERCE76_EXTRACT1]], [[COERCE77_EXTRACT0]], [[COERCE77_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE78]], align 16 +// COMPAT_17-NEXT: [[COERCE78_TUPLE:%.*]] = load { , , }, ptr [[COERCE78]], align 16 +// COMPAT_17-NEXT: [[COERCE78_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE78_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE78_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE78_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE78_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE78_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE79]], align 16 +// COMPAT_17-NEXT: [[COERCE79_TUPLE:%.*]] = load { , , }, ptr [[COERCE79]], align 16 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE79_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE79_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE79_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x3_t13svmfloat8x3_t( [[COERCE78_EXTRACT0]], [[COERCE78_EXTRACT1]], [[COERCE78_EXTRACT2]], [[COERCE79_EXTRACT0]], [[COERCE79_EXTRACT1]], [[COERCE79_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE80]], align 16 +// COMPAT_17-NEXT: [[COERCE80_TUPLE:%.*]] = load { , , , }, ptr [[COERCE80]], align 16 +// COMPAT_17-NEXT: [[COERCE80_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE80_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE80_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE80_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE80_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE81]], align 16 +// COMPAT_17-NEXT: [[COERCE81_TUPLE:%.*]] = load { , , , }, ptr [[COERCE81]], align 16 +// COMPAT_17-NEXT: [[COERCE81_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE81_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE81_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE81_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE81_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x4_t13svmfloat8x4_t( [[COERCE80_EXTRACT0]], [[COERCE80_EXTRACT1]], [[COERCE80_EXTRACT2]], [[COERCE80_EXTRACT3]], [[COERCE81_EXTRACT0]], [[COERCE81_EXTRACT1]], [[COERCE81_EXTRACT2]], [[COERCE81_EXTRACT3]]) // COMPAT_17-NEXT: ret void // void foo() { @@ -1146,4 +1227,7 @@ void foo() { f(__clang_svbfloat16x4_t(), __clang_svbfloat16x4_t()); f(__clang_svboolx2_t(), __clang_svboolx2_t()); f(__clang_svboolx4_t(), __clang_svboolx4_t()); + f(__clang_svmfloat8x2_t(), __clang_svmfloat8x2_t()); + f(__clang_svmfloat8x3_t(), __clang_svmfloat8x3_t()); + f(__clang_svmfloat8x4_t(), __clang_svmfloat8x4_t()); } diff --git a/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp b/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp index 45cf8081eb3a4d..f9068364d0dcbb 100644 --- a/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp @@ -57,6 +57,9 @@ // CHECK-NEXT: [[B8X2:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[B8X4:%.*]] = alloca { , , , }, align 2 // CHECK-NEXT: [[CNT:%.*]] = alloca target("aarch64.svcount"), align 2 +// CHECK-NEXT: [[MF8X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[MF8X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[MF8X4:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: store zeroinitializer, ptr [[S8]], align 16 // CHECK-NEXT: store zeroinitializer, ptr [[S16]], align 16 // CHECK-NEXT: store zeroinitializer, ptr [[S32]], align 16 @@ -110,6 +113,9 @@ // CHECK-NEXT: store { , } zeroinitializer, ptr [[B8X2]], align 2 // CHECK-NEXT: store { , , , } zeroinitializer, ptr [[B8X4]], align 2 // CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[CNT]], align 2 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[MF8X2]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[MF8X3]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[MF8X4]], align 16 // CHECK-NEXT: ret void // void test_locals(void) { @@ -171,6 +177,10 @@ void test_locals(void) { __clang_svboolx4_t b8x4{}; __SVCount_t cnt{}; + + __clang_svmfloat8x2_t mf8x2{}; + __clang_svmfloat8x3_t mf8x3{}; + __clang_svmfloat8x4_t mf8x4{}; } // CHECK-LABEL: define dso_local void @_Z12test_copy_s8u10__SVInt8_t @@ -1142,3 +1152,63 @@ void test_copy_b8x4(__clang_svboolx4_t a) { void test_copy_cnt(__SVCount_t a) { __SVCount_t b{a}; } + +// CHECK-LABEL: define dso_local void @_Z15test_copy_mf8x213svmfloat8x2_t +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 +// CHECK-NEXT: ret void +// +void test_copy_mf8x2(__clang_svmfloat8x2_t a) { + __clang_svmfloat8x2_t b{a}; +} + +// CHECK-LABEL: define dso_local void @_Z15test_copy_mf8x313svmfloat8x3_t +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 +// CHECK-NEXT: ret void +// +void test_copy_mf8x3(__clang_svmfloat8x3_t a) { + __clang_svmfloat8x3_t b{a}; +} + +// CHECK-LABEL: define dso_local void @_Z15test_copy_mf8x413svmfloat8x4_t +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 +// CHECK-NEXT: ret void +// +void test_copy_mf8x4(__clang_svmfloat8x4_t a) { + __clang_svmfloat8x4_t b{a}; +} diff --git a/clang/test/CodeGenCXX/armv7k.cpp b/clang/test/CodeGenCXX/armv7k.cpp index a4a243c162ea3f..7aa9fd7944cfdf 100644 --- a/clang/test/CodeGenCXX/armv7k.cpp +++ b/clang/test/CodeGenCXX/armv7k.cpp @@ -50,17 +50,17 @@ namespace test2 { struct __attribute__((visibility("hidden"))) B {}; const std::type_info &b0 = typeid(B); - // CHECK-GLOBALS: @_ZTSN5test21BE = linkonce_odr hidden constant // CHECK-GLOBALS: @_ZTIN5test21BE = linkonce_odr hidden constant { {{.*}}, ptr @_ZTSN5test21BE } + // CHECK-GLOBALS: @_ZTSN5test21BE = linkonce_odr hidden constant const std::type_info &b1 = typeid(B*); - // CHECK-GLOBALS: @_ZTSPN5test21BE = linkonce_odr hidden constant // CHECK-GLOBALS: @_ZTIPN5test21BE = linkonce_odr hidden constant { {{.*}}, ptr @_ZTSPN5test21BE, i32 0, ptr @_ZTIN5test21BE + // CHECK-GLOBALS: @_ZTSPN5test21BE = linkonce_odr hidden constant struct C {}; const std::type_info &c0 = typeid(C); - // CHECK-GLOBALS: @_ZTSN5test21CE = linkonce_odr constant [11 x i8] c"N5test21CE\00" // CHECK-GLOBALS: @_ZTIN5test21CE = linkonce_odr constant { {{.*}}, ptr @_ZTSN5test21CE } + // CHECK-GLOBALS: @_ZTSN5test21CE = linkonce_odr constant [11 x i8] c"N5test21CE\00" } // va_list should be based on "char *" rather than "ptr". diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index d0c87d9dfda5f7..271d9ede79d0c4 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -10,17 +10,17 @@ B fail; // CHECK: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // CHECK: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 // CHECK: @_ZTI1A = external addrspace(1) constant ptr addrspace(1) +// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1 -// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 // WITH-NONZERO-DEFAULT-AS: @_ZTI1A = external addrspace(1) constant ptr addrspace(1) +// WITH-NONZERO-DEFAULT-AS: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // WITH-NONZERO-DEFAULT-AS: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1 -// WITH-NONZERO-DEFAULT-AS: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 //. // CHECK-LABEL: define dso_local noundef nonnull align 8 dereferenceable(8) ptr @_Z1fP1A( // CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 { diff --git a/clang/test/CodeGenCXX/exceptions-no-rtti.cpp b/clang/test/CodeGenCXX/exceptions-no-rtti.cpp index 7c73285b948f16..a3d969665bdc71 100644 --- a/clang/test/CodeGenCXX/exceptions-no-rtti.cpp +++ b/clang/test/CodeGenCXX/exceptions-no-rtti.cpp @@ -3,8 +3,8 @@ // CHECK: @_ZTIN5test11AE = linkonce_odr constant // CHECK: @_ZTIN5test11BE = linkonce_odr constant // CHECK: @_ZTIN5test11CE = linkonce_odr constant -// CHECK: @_ZTIN5test11DE = linkonce_odr constant // CHECK: @_ZTIPN5test11DE = linkonce_odr constant {{.*}} @_ZTIN5test11DE +// CHECK: @_ZTIN5test11DE = linkonce_odr constant // PR6974: this shouldn't crash namespace test0 { diff --git a/clang/test/CodeGenCXX/implicit-record-visibility.cpp b/clang/test/CodeGenCXX/implicit-record-visibility.cpp index ef388c7b8316a0..84ad822702d39b 100644 --- a/clang/test/CodeGenCXX/implicit-record-visibility.cpp +++ b/clang/test/CodeGenCXX/implicit-record-visibility.cpp @@ -7,6 +7,6 @@ // under -fvisibility=hidden the type of function f, due to its va_list (aka // __builtin_va_list, aka __va_list_tag (*)[1]) parameter would be hidden: -// CHECK: @_ZTSFvP13__va_list_tagE = linkonce_odr constant // CHECK: @_ZTIFvP13__va_list_tagE = linkonce_odr constant +// CHECK: @_ZTSFvP13__va_list_tagE = linkonce_odr constant void f(va_list) { (void)typeid(f); } diff --git a/clang/test/CodeGenCXX/mdefault-visibility-export-mapping-rtti.cpp b/clang/test/CodeGenCXX/mdefault-visibility-export-mapping-rtti.cpp index 1af105e915e636..2fc0a6a4ee608e 100644 --- a/clang/test/CodeGenCXX/mdefault-visibility-export-mapping-rtti.cpp +++ b/clang/test/CodeGenCXX/mdefault-visibility-export-mapping-rtti.cpp @@ -16,20 +16,20 @@ // C is an incomplete class type, so any direct or indirect pointer types should have // internal linkage, as should the type info for C itself. struct C; +// CHECK: @_ZTIP1C = internal constant // CHECK: @_ZTSP1C = internal constant -// CHECK: @_ZTS1C = internal constant // CHECK: @_ZTI1C = internal constant -// CHECK: @_ZTIP1C = internal constant -// CHECK: @_ZTSPP1C = internal constant +// CHECK: @_ZTS1C = internal constant // CHECK: @_ZTIPP1C = internal constant +// CHECK: @_ZTSPP1C = internal constant struct __attribute__((type_visibility("default"))) D; +// CHECK: @_ZTIP1D = internal constant // CHECK: @_ZTSP1D = internal constant -// CHECK: @_ZTS1D = internal constant // CHECK: @_ZTI1D = internal constant -// CHECK: @_ZTIP1D = internal constant -// CHECK: @_ZTSPP1D = internal constant +// CHECK: @_ZTS1D = internal constant // CHECK: @_ZTIPP1D = internal constant +// CHECK: @_ZTSPP1D = internal constant void __attribute__((visibility("default"))) tfunc() { (void)typeid(C *); @@ -46,12 +46,12 @@ void s::foo() {} // UNSPECIFIED-DEF: @_ZTV1s = unnamed_addr constant // UNSPECIFIED-HID: @_ZTV1s = hidden unnamed_addr constant // UNSPECIFIED-EXP: @_ZTV1s = dllexport unnamed_addr constant -// UNSPECIFIED-DEF: @_ZTS1s = constant -// UNSPECIFIED-HID: @_ZTS1s = hidden constant -// UNSPECIFIED-EXP: @_ZTS1s = dllexport constant // UNSPECIFIED-DEF: @_ZTI1s = constant // UNSPECIFIED-HID: @_ZTI1s = hidden constant // UNSPECIFIED-EXP: @_ZTI1s = dllexport constant +// UNSPECIFIED-DEF: @_ZTS1s = constant +// UNSPECIFIED-HID: @_ZTS1s = hidden constant +// UNSPECIFIED-EXP: @_ZTS1s = dllexport constant // explicit default visibility RTTI & vtable struct __attribute__((type_visibility("default"))) t { @@ -61,12 +61,12 @@ void t::foo() {} // EXPLICIT-DEF: @_ZTV1t = unnamed_addr constant // EXPLICIT-HID: @_ZTV1t = hidden unnamed_addr constant // EXPLICIT-EXP: @_ZTV1t = dllexport unnamed_addr constant -// EXPLICIT-DEF: @_ZTS1t = constant -// EXPLICIT-HID: @_ZTS1t = hidden constant -// EXPLICIT-EXP: @_ZTS1t = dllexport constant // EXPLICIT-DEF: @_ZTI1t = constant // EXPLICIT-HID: @_ZTI1t = hidden constant // EXPLICIT-EXP: @_ZTI1t = dllexport constant +// EXPLICIT-DEF: @_ZTS1t = constant +// EXPLICIT-HID: @_ZTS1t = hidden constant +// EXPLICIT-EXP: @_ZTS1t = dllexport constant #ifdef FUNDAMENTAL_IS_EXPLICIT #define TYPE_VIS __attribute__((type_visibility("default"))) @@ -86,511 +86,511 @@ __fundamental_type_info::~__fundamental_type_info() {} // __cxxabiv1::__fundamental_type_info // FUND-DEF: @_ZTVN10__cxxabiv123__fundamental_type_infoE = unnamed_addr constant -// FUND-DEF: @_ZTSN10__cxxabiv123__fundamental_type_infoE = constant // FUND-DEF: @_ZTIN10__cxxabiv123__fundamental_type_infoE = constant +// FUND-DEF: @_ZTSN10__cxxabiv123__fundamental_type_infoE = constant // FUND-HID: @_ZTVN10__cxxabiv123__fundamental_type_infoE = hidden unnamed_addr constant -// FUND-HID: @_ZTSN10__cxxabiv123__fundamental_type_infoE = hidden constant // FUND-HID: @_ZTIN10__cxxabiv123__fundamental_type_infoE = hidden constant +// FUND-HID: @_ZTSN10__cxxabiv123__fundamental_type_infoE = hidden constant // FUND-EXP: @_ZTVN10__cxxabiv123__fundamental_type_infoE = dllexport unnamed_addr constant -// FUND-EXP: @_ZTSN10__cxxabiv123__fundamental_type_infoE = dllexport constant // FUND-EXP: @_ZTIN10__cxxabiv123__fundamental_type_infoE = dllexport constant +// FUND-EXP: @_ZTSN10__cxxabiv123__fundamental_type_infoE = dllexport constant // void -// FUND-DEF: @_ZTSv = constant // FUND-DEF: @_ZTIv = constant -// FUND-DEF: @_ZTSPv = constant +// FUND-DEF: @_ZTSv = constant // FUND-DEF: @_ZTIPv = constant -// FUND-DEF: @_ZTSPKv = constant +// FUND-DEF: @_ZTSPv = constant // FUND-DEF: @_ZTIPKv = constant -// FUND-HID: @_ZTSv = hidden constant +// FUND-DEF: @_ZTSPKv = constant // FUND-HID: @_ZTIv = hidden constant -// FUND-HID: @_ZTSPv = hidden constant +// FUND-HID: @_ZTSv = hidden constant // FUND-HID: @_ZTIPv = hidden constant -// FUND-HID: @_ZTSPKv = hidden constant +// FUND-HID: @_ZTSPv = hidden constant // FUND-HID: @_ZTIPKv = hidden constant -// FUND-EXP: @_ZTSv = dllexport constant +// FUND-HID: @_ZTSPKv = hidden constant // FUND-EXP: @_ZTIv = dllexport constant -// FUND-EXP: @_ZTSPv = dllexport constant +// FUND-EXP: @_ZTSv = dllexport constant // FUND-EXP: @_ZTIPv = dllexport constant -// FUND-EXP: @_ZTSPKv = dllexport constant +// FUND-EXP: @_ZTSPv = dllexport constant // FUND-EXP: @_ZTIPKv = dllexport constant +// FUND-EXP: @_ZTSPKv = dllexport constant // std::nullptr_t -// FUND-DEF: @_ZTSDn = constant // FUND-DEF: @_ZTIDn = constant -// FUND-DEF: @_ZTSPDn = constant +// FUND-DEF: @_ZTSDn = constant // FUND-DEF: @_ZTIPDn = constant -// FUND-DEF: @_ZTSPKDn = constant +// FUND-DEF: @_ZTSPDn = constant // FUND-DEF: @_ZTIPKDn = constant -// FUND-HID: @_ZTSDn = hidden constant +// FUND-DEF: @_ZTSPKDn = constant // FUND-HID: @_ZTIDn = hidden constant -// FUND-HID: @_ZTSPDn = hidden constant +// FUND-HID: @_ZTSDn = hidden constant // FUND-HID: @_ZTIPDn = hidden constant -// FUND-HID: @_ZTSPKDn = hidden constant +// FUND-HID: @_ZTSPDn = hidden constant // FUND-HID: @_ZTIPKDn = hidden constant -// FUND-EXP: @_ZTSDn = dllexport constant +// FUND-HID: @_ZTSPKDn = hidden constant // FUND-EXP: @_ZTIDn = dllexport constant -// FUND-EXP: @_ZTSPDn = dllexport constant +// FUND-EXP: @_ZTSDn = dllexport constant // FUND-EXP: @_ZTIPDn = dllexport constant -// FUND-EXP: @_ZTSPKDn = dllexport constant +// FUND-EXP: @_ZTSPDn = dllexport constant // FUND-EXP: @_ZTIPKDn = dllexport constant +// FUND-EXP: @_ZTSPKDn = dllexport constant // bool -// FUND-DEF: @_ZTSb = constant // FUND-DEF: @_ZTIb = constant -// FUND-DEF: @_ZTSPb = constant +// FUND-DEF: @_ZTSb = constant // FUND-DEF: @_ZTIPb = constant -// FUND-DEF: @_ZTSPKb = constant +// FUND-DEF: @_ZTSPb = constant // FUND-DEF: @_ZTIPKb = constant -// FUND-HID: @_ZTSb = hidden constant +// FUND-DEF: @_ZTSPKb = constant // FUND-HID: @_ZTIb = hidden constant -// FUND-HID: @_ZTSPb = hidden constant +// FUND-HID: @_ZTSb = hidden constant // FUND-HID: @_ZTIPb = hidden constant -// FUND-HID: @_ZTSPKb = hidden constant +// FUND-HID: @_ZTSPb = hidden constant // FUND-HID: @_ZTIPKb = hidden constant -// FUND-EXP: @_ZTSb = dllexport constant +// FUND-HID: @_ZTSPKb = hidden constant // FUND-EXP: @_ZTIb = dllexport constant -// FUND-EXP: @_ZTSPb = dllexport constant +// FUND-EXP: @_ZTSb = dllexport constant // FUND-EXP: @_ZTIPb = dllexport constant -// FUND-EXP: @_ZTSPKb = dllexport constant +// FUND-EXP: @_ZTSPb = dllexport constant // FUND-EXP: @_ZTIPKb = dllexport constant +// FUND-EXP: @_ZTSPKb = dllexport constant // wchar_t -// FUND-DEF: @_ZTSw = constant // FUND-DEF: @_ZTIw = constant -// FUND-DEF: @_ZTSPw = constant +// FUND-DEF: @_ZTSw = constant // FUND-DEF: @_ZTIPw = constant -// FUND-DEF: @_ZTSPKw = constant +// FUND-DEF: @_ZTSPw = constant // FUND-DEF: @_ZTIPKw = constant -// FUND-HID: @_ZTSw = hidden constant +// FUND-DEF: @_ZTSPKw = constant // FUND-HID: @_ZTIw = hidden constant -// FUND-HID: @_ZTSPw = hidden constant +// FUND-HID: @_ZTSw = hidden constant // FUND-HID: @_ZTIPw = hidden constant -// FUND-HID: @_ZTSPKw = hidden constant +// FUND-HID: @_ZTSPw = hidden constant // FUND-HID: @_ZTIPKw = hidden constant -// FUND-EXP: @_ZTSw = dllexport constant +// FUND-HID: @_ZTSPKw = hidden constant // FUND-EXP: @_ZTIw = dllexport constant -// FUND-EXP: @_ZTSPw = dllexport constant +// FUND-EXP: @_ZTSw = dllexport constant // FUND-EXP: @_ZTIPw = dllexport constant -// FUND-EXP: @_ZTSPKw = dllexport constant +// FUND-EXP: @_ZTSPw = dllexport constant // FUND-EXP: @_ZTIPKw = dllexport constant +// FUND-EXP: @_ZTSPKw = dllexport constant // char -// FUND-DEF: @_ZTSc = constant // FUND-DEF: @_ZTIc = constant -// FUND-DEF: @_ZTSPc = constant +// FUND-DEF: @_ZTSc = constant // FUND-DEF: @_ZTIPc = constant -// FUND-DEF: @_ZTSPKc = constant +// FUND-DEF: @_ZTSPc = constant // FUND-DEF: @_ZTIPKc = constant -// FUND-HID: @_ZTSc = hidden constant +// FUND-DEF: @_ZTSPKc = constant // FUND-HID: @_ZTIc = hidden constant -// FUND-HID: @_ZTSPc = hidden constant +// FUND-HID: @_ZTSc = hidden constant // FUND-HID: @_ZTIPc = hidden constant -// FUND-HID: @_ZTSPKc = hidden constant +// FUND-HID: @_ZTSPc = hidden constant // FUND-HID: @_ZTIPKc = hidden constant -// FUND-EXP: @_ZTSc = dllexport constant +// FUND-HID: @_ZTSPKc = hidden constant // FUND-EXP: @_ZTIc = dllexport constant -// FUND-EXP: @_ZTSPc = dllexport constant +// FUND-EXP: @_ZTSc = dllexport constant // FUND-EXP: @_ZTIPc = dllexport constant -// FUND-EXP: @_ZTSPKc = dllexport constant +// FUND-EXP: @_ZTSPc = dllexport constant // FUND-EXP: @_ZTIPKc = dllexport constant +// FUND-EXP: @_ZTSPKc = dllexport constant // unsigned char -// FUND-DEF: @_ZTSh = constant // FUND-DEF: @_ZTIh = constant -// FUND-DEF: @_ZTSPh = constant +// FUND-DEF: @_ZTSh = constant // FUND-DEF: @_ZTIPh = constant -// FUND-DEF: @_ZTSPKh = constant +// FUND-DEF: @_ZTSPh = constant // FUND-DEF: @_ZTIPKh = constant -// FUND-HID: @_ZTSh = hidden constant +// FUND-DEF: @_ZTSPKh = constant // FUND-HID: @_ZTIh = hidden constant -// FUND-HID: @_ZTSPh = hidden constant +// FUND-HID: @_ZTSh = hidden constant // FUND-HID: @_ZTIPh = hidden constant -// FUND-HID: @_ZTSPKh = hidden constant +// FUND-HID: @_ZTSPh = hidden constant // FUND-HID: @_ZTIPKh = hidden constant -// FUND-EXP: @_ZTSh = dllexport constant +// FUND-HID: @_ZTSPKh = hidden constant // FUND-EXP: @_ZTIh = dllexport constant -// FUND-EXP: @_ZTSPh = dllexport constant +// FUND-EXP: @_ZTSh = dllexport constant // FUND-EXP: @_ZTIPh = dllexport constant -// FUND-EXP: @_ZTSPKh = dllexport constant +// FUND-EXP: @_ZTSPh = dllexport constant // FUND-EXP: @_ZTIPKh = dllexport constant +// FUND-EXP: @_ZTSPKh = dllexport constant // signed char -// FUND-DEF: @_ZTSa = constant // FUND-DEF: @_ZTIa = constant -// FUND-DEF: @_ZTSPa = constant +// FUND-DEF: @_ZTSa = constant // FUND-DEF: @_ZTIPa = constant -// FUND-DEF: @_ZTSPKa = constant +// FUND-DEF: @_ZTSPa = constant // FUND-DEF: @_ZTIPKa = constant -// FUND-HID: @_ZTSa = hidden constant +// FUND-DEF: @_ZTSPKa = constant // FUND-HID: @_ZTIa = hidden constant -// FUND-HID: @_ZTSPa = hidden constant +// FUND-HID: @_ZTSa = hidden constant // FUND-HID: @_ZTIPa = hidden constant -// FUND-HID: @_ZTSPKa = hidden constant +// FUND-HID: @_ZTSPa = hidden constant // FUND-HID: @_ZTIPKa = hidden constant -// FUND-EXP: @_ZTSa = dllexport constant +// FUND-HID: @_ZTSPKa = hidden constant // FUND-EXP: @_ZTIa = dllexport constant -// FUND-EXP: @_ZTSPa = dllexport constant +// FUND-EXP: @_ZTSa = dllexport constant // FUND-EXP: @_ZTIPa = dllexport constant -// FUND-EXP: @_ZTSPKa = dllexport constant +// FUND-EXP: @_ZTSPa = dllexport constant // FUND-EXP: @_ZTIPKa = dllexport constant +// FUND-EXP: @_ZTSPKa = dllexport constant // short -// FUND-DEF: @_ZTSs = constant // FUND-DEF: @_ZTIs = constant -// FUND-DEF: @_ZTSPs = constant +// FUND-DEF: @_ZTSs = constant // FUND-DEF: @_ZTIPs = constant -// FUND-DEF: @_ZTSPKs = constant +// FUND-DEF: @_ZTSPs = constant // FUND-DEF: @_ZTIPKs = constant -// FUND-HID: @_ZTSs = hidden constant +// FUND-DEF: @_ZTSPKs = constant // FUND-HID: @_ZTIs = hidden constant -// FUND-HID: @_ZTSPs = hidden constant +// FUND-HID: @_ZTSs = hidden constant // FUND-HID: @_ZTIPs = hidden constant -// FUND-HID: @_ZTSPKs = hidden constant +// FUND-HID: @_ZTSPs = hidden constant // FUND-HID: @_ZTIPKs = hidden constant -// FUND-EXP: @_ZTSs = dllexport constant +// FUND-HID: @_ZTSPKs = hidden constant // FUND-EXP: @_ZTIs = dllexport constant -// FUND-EXP: @_ZTSPs = dllexport constant +// FUND-EXP: @_ZTSs = dllexport constant // FUND-EXP: @_ZTIPs = dllexport constant -// FUND-EXP: @_ZTSPKs = dllexport constant +// FUND-EXP: @_ZTSPs = dllexport constant // FUND-EXP: @_ZTIPKs = dllexport constant +// FUND-EXP: @_ZTSPKs = dllexport constant // unsigned short -// FUND-DEF: @_ZTSt = constant // FUND-DEF: @_ZTIt = constant -// FUND-DEF: @_ZTSPt = constant +// FUND-DEF: @_ZTSt = constant // FUND-DEF: @_ZTIPt = constant -// FUND-DEF: @_ZTSPKt = constant +// FUND-DEF: @_ZTSPt = constant // FUND-DEF: @_ZTIPKt = constant -// FUND-HID: @_ZTSt = hidden constant +// FUND-DEF: @_ZTSPKt = constant // FUND-HID: @_ZTIt = hidden constant -// FUND-HID: @_ZTSPt = hidden constant +// FUND-HID: @_ZTSt = hidden constant // FUND-HID: @_ZTIPt = hidden constant -// FUND-HID: @_ZTSPKt = hidden constant +// FUND-HID: @_ZTSPt = hidden constant // FUND-HID: @_ZTIPKt = hidden constant -// FUND-EXP: @_ZTSt = dllexport constant +// FUND-HID: @_ZTSPKt = hidden constant // FUND-EXP: @_ZTIt = dllexport constant -// FUND-EXP: @_ZTSPt = dllexport constant +// FUND-EXP: @_ZTSt = dllexport constant // FUND-EXP: @_ZTIPt = dllexport constant -// FUND-EXP: @_ZTSPKt = dllexport constant +// FUND-EXP: @_ZTSPt = dllexport constant // FUND-EXP: @_ZTIPKt = dllexport constant +// FUND-EXP: @_ZTSPKt = dllexport constant // int -// FUND-DEF: @_ZTSi = constant // FUND-DEF: @_ZTIi = constant -// FUND-DEF: @_ZTSPi = constant +// FUND-DEF: @_ZTSi = constant // FUND-DEF: @_ZTIPi = constant -// FUND-DEF: @_ZTSPKi = constant +// FUND-DEF: @_ZTSPi = constant // FUND-DEF: @_ZTIPKi = constant -// FUND-HID: @_ZTSi = hidden constant +// FUND-DEF: @_ZTSPKi = constant // FUND-HID: @_ZTIi = hidden constant -// FUND-HID: @_ZTSPi = hidden constant +// FUND-HID: @_ZTSi = hidden constant // FUND-HID: @_ZTIPi = hidden constant -// FUND-HID: @_ZTSPKi = hidden constant +// FUND-HID: @_ZTSPi = hidden constant // FUND-HID: @_ZTIPKi = hidden constant -// FUND-EXP: @_ZTSi = dllexport constant +// FUND-HID: @_ZTSPKi = hidden constant // FUND-EXP: @_ZTIi = dllexport constant -// FUND-EXP: @_ZTSPi = dllexport constant +// FUND-EXP: @_ZTSi = dllexport constant // FUND-EXP: @_ZTIPi = dllexport constant -// FUND-EXP: @_ZTSPKi = dllexport constant +// FUND-EXP: @_ZTSPi = dllexport constant // FUND-EXP: @_ZTIPKi = dllexport constant +// FUND-EXP: @_ZTSPKi = dllexport constant // unsigned int -// FUND-DEF: @_ZTSj = constant // FUND-DEF: @_ZTIj = constant -// FUND-DEF: @_ZTSPj = constant +// FUND-DEF: @_ZTSj = constant // FUND-DEF: @_ZTIPj = constant -// FUND-DEF: @_ZTSPKj = constant +// FUND-DEF: @_ZTSPj = constant // FUND-DEF: @_ZTIPKj = constant -// FUND-HID: @_ZTSj = hidden constant +// FUND-DEF: @_ZTSPKj = constant // FUND-HID: @_ZTIj = hidden constant -// FUND-HID: @_ZTSPj = hidden constant +// FUND-HID: @_ZTSj = hidden constant // FUND-HID: @_ZTIPj = hidden constant -// FUND-HID: @_ZTSPKj = hidden constant +// FUND-HID: @_ZTSPj = hidden constant // FUND-HID: @_ZTIPKj = hidden constant -// FUND-EXP: @_ZTSj = dllexport constant +// FUND-HID: @_ZTSPKj = hidden constant // FUND-EXP: @_ZTIj = dllexport constant -// FUND-EXP: @_ZTSPj = dllexport constant +// FUND-EXP: @_ZTSj = dllexport constant // FUND-EXP: @_ZTIPj = dllexport constant -// FUND-EXP: @_ZTSPKj = dllexport constant +// FUND-EXP: @_ZTSPj = dllexport constant // FUND-EXP: @_ZTIPKj = dllexport constant +// FUND-EXP: @_ZTSPKj = dllexport constant // long -// FUND-DEF: @_ZTSl = constant // FUND-DEF: @_ZTIl = constant -// FUND-DEF: @_ZTSPl = constant +// FUND-DEF: @_ZTSl = constant // FUND-DEF: @_ZTIPl = constant -// FUND-DEF: @_ZTSPKl = constant +// FUND-DEF: @_ZTSPl = constant // FUND-DEF: @_ZTIPKl = constant -// FUND-HID: @_ZTSl = hidden constant +// FUND-DEF: @_ZTSPKl = constant // FUND-HID: @_ZTIl = hidden constant -// FUND-HID: @_ZTSPl = hidden constant +// FUND-HID: @_ZTSl = hidden constant // FUND-HID: @_ZTIPl = hidden constant -// FUND-HID: @_ZTSPKl = hidden constant +// FUND-HID: @_ZTSPl = hidden constant // FUND-HID: @_ZTIPKl = hidden constant -// FUND-EXP: @_ZTSl = dllexport constant +// FUND-HID: @_ZTSPKl = hidden constant // FUND-EXP: @_ZTIl = dllexport constant -// FUND-EXP: @_ZTSPl = dllexport constant +// FUND-EXP: @_ZTSl = dllexport constant // FUND-EXP: @_ZTIPl = dllexport constant -// FUND-EXP: @_ZTSPKl = dllexport constant +// FUND-EXP: @_ZTSPl = dllexport constant // FUND-EXP: @_ZTIPKl = dllexport constant +// FUND-EXP: @_ZTSPKl = dllexport constant // unsigned long -// FUND-DEF: @_ZTSm = constant // FUND-DEF: @_ZTIm = constant -// FUND-DEF: @_ZTSPm = constant +// FUND-DEF: @_ZTSm = constant // FUND-DEF: @_ZTIPm = constant -// FUND-DEF: @_ZTSPKm = constant +// FUND-DEF: @_ZTSPm = constant // FUND-DEF: @_ZTIPKm = constant -// FUND-HID: @_ZTSm = hidden constant +// FUND-DEF: @_ZTSPKm = constant // FUND-HID: @_ZTIm = hidden constant -// FUND-HID: @_ZTSPm = hidden constant +// FUND-HID: @_ZTSm = hidden constant // FUND-HID: @_ZTIPm = hidden constant -// FUND-HID: @_ZTSPKm = hidden constant +// FUND-HID: @_ZTSPm = hidden constant // FUND-HID: @_ZTIPKm = hidden constant -// FUND-EXP: @_ZTSm = dllexport constant +// FUND-HID: @_ZTSPKm = hidden constant // FUND-EXP: @_ZTIm = dllexport constant -// FUND-EXP: @_ZTSPm = dllexport constant +// FUND-EXP: @_ZTSm = dllexport constant // FUND-EXP: @_ZTIPm = dllexport constant -// FUND-EXP: @_ZTSPKm = dllexport constant +// FUND-EXP: @_ZTSPm = dllexport constant // FUND-EXP: @_ZTIPKm = dllexport constant +// FUND-EXP: @_ZTSPKm = dllexport constant // long long -// FUND-DEF: @_ZTSx = constant // FUND-DEF: @_ZTIx = constant -// FUND-DEF: @_ZTSPx = constant +// FUND-DEF: @_ZTSx = constant // FUND-DEF: @_ZTIPx = constant -// FUND-DEF: @_ZTSPKx = constant +// FUND-DEF: @_ZTSPx = constant // FUND-DEF: @_ZTIPKx = constant -// FUND-HID: @_ZTSx = hidden constant +// FUND-DEF: @_ZTSPKx = constant // FUND-HID: @_ZTIx = hidden constant -// FUND-HID: @_ZTSPx = hidden constant +// FUND-HID: @_ZTSx = hidden constant // FUND-HID: @_ZTIPx = hidden constant -// FUND-HID: @_ZTSPKx = hidden constant +// FUND-HID: @_ZTSPx = hidden constant // FUND-HID: @_ZTIPKx = hidden constant -// FUND-EXP: @_ZTSx = dllexport constant +// FUND-HID: @_ZTSPKx = hidden constant // FUND-EXP: @_ZTIx = dllexport constant -// FUND-EXP: @_ZTSPx = dllexport constant +// FUND-EXP: @_ZTSx = dllexport constant // FUND-EXP: @_ZTIPx = dllexport constant -// FUND-EXP: @_ZTSPKx = dllexport constant +// FUND-EXP: @_ZTSPx = dllexport constant // FUND-EXP: @_ZTIPKx = dllexport constant +// FUND-EXP: @_ZTSPKx = dllexport constant // unsigned long long -// FUND-DEF: @_ZTSy = constant // FUND-DEF: @_ZTIy = constant -// FUND-DEF: @_ZTSPy = constant +// FUND-DEF: @_ZTSy = constant // FUND-DEF: @_ZTIPy = constant -// FUND-DEF: @_ZTSPKy = constant +// FUND-DEF: @_ZTSPy = constant // FUND-DEF: @_ZTIPKy = constant -// FUND-HID: @_ZTSy = hidden constant +// FUND-DEF: @_ZTSPKy = constant // FUND-HID: @_ZTIy = hidden constant -// FUND-HID: @_ZTSPy = hidden constant +// FUND-HID: @_ZTSy = hidden constant // FUND-HID: @_ZTIPy = hidden constant -// FUND-HID: @_ZTSPKy = hidden constant +// FUND-HID: @_ZTSPy = hidden constant // FUND-HID: @_ZTIPKy = hidden constant -// FUND-EXP: @_ZTSy = dllexport constant +// FUND-HID: @_ZTSPKy = hidden constant // FUND-EXP: @_ZTIy = dllexport constant -// FUND-EXP: @_ZTSPy = dllexport constant +// FUND-EXP: @_ZTSy = dllexport constant // FUND-EXP: @_ZTIPy = dllexport constant -// FUND-EXP: @_ZTSPKy = dllexport constant +// FUND-EXP: @_ZTSPy = dllexport constant // FUND-EXP: @_ZTIPKy = dllexport constant +// FUND-EXP: @_ZTSPKy = dllexport constant // __int128 -// FUND-DEF: @_ZTSn = constant // FUND-DEF: @_ZTIn = constant -// FUND-DEF: @_ZTSPn = constant +// FUND-DEF: @_ZTSn = constant // FUND-DEF: @_ZTIPn = constant -// FUND-DEF: @_ZTSPKn = constant +// FUND-DEF: @_ZTSPn = constant // FUND-DEF: @_ZTIPKn = constant -// FUND-HID: @_ZTSn = hidden constant +// FUND-DEF: @_ZTSPKn = constant // FUND-HID: @_ZTIn = hidden constant -// FUND-HID: @_ZTSPn = hidden constant +// FUND-HID: @_ZTSn = hidden constant // FUND-HID: @_ZTIPn = hidden constant -// FUND-HID: @_ZTSPKn = hidden constant +// FUND-HID: @_ZTSPn = hidden constant // FUND-HID: @_ZTIPKn = hidden constant -// FUND-EXP: @_ZTSn = dllexport constant +// FUND-HID: @_ZTSPKn = hidden constant // FUND-EXP: @_ZTIn = dllexport constant -// FUND-EXP: @_ZTSPn = dllexport constant +// FUND-EXP: @_ZTSn = dllexport constant // FUND-EXP: @_ZTIPn = dllexport constant -// FUND-EXP: @_ZTSPKn = dllexport constant +// FUND-EXP: @_ZTSPn = dllexport constant // FUND-EXP: @_ZTIPKn = dllexport constant +// FUND-EXP: @_ZTSPKn = dllexport constant // unsigned __int128 -// FUND-DEF: @_ZTSo = constant // FUND-DEF: @_ZTIo = constant -// FUND-DEF: @_ZTSPo = constant +// FUND-DEF: @_ZTSo = constant // FUND-DEF: @_ZTIPo = constant -// FUND-DEF: @_ZTSPKo = constant +// FUND-DEF: @_ZTSPo = constant // FUND-DEF: @_ZTIPKo = constant -// FUND-HID: @_ZTSo = hidden constant +// FUND-DEF: @_ZTSPKo = constant // FUND-HID: @_ZTIo = hidden constant -// FUND-HID: @_ZTSPo = hidden constant +// FUND-HID: @_ZTSo = hidden constant // FUND-HID: @_ZTIPo = hidden constant -// FUND-HID: @_ZTSPKo = hidden constant +// FUND-HID: @_ZTSPo = hidden constant // FUND-HID: @_ZTIPKo = hidden constant -// FUND-EXP: @_ZTSo = dllexport constant +// FUND-HID: @_ZTSPKo = hidden constant // FUND-EXP: @_ZTIo = dllexport constant -// FUND-EXP: @_ZTSPo = dllexport constant +// FUND-EXP: @_ZTSo = dllexport constant // FUND-EXP: @_ZTIPo = dllexport constant -// FUND-EXP: @_ZTSPKo = dllexport constant +// FUND-EXP: @_ZTSPo = dllexport constant // FUND-EXP: @_ZTIPKo = dllexport constant +// FUND-EXP: @_ZTSPKo = dllexport constant // half -// FUND-DEF: @_ZTSDh = constant // FUND-DEF: @_ZTIDh = constant -// FUND-DEF: @_ZTSPDh = constant +// FUND-DEF: @_ZTSDh = constant // FUND-DEF: @_ZTIPDh = constant -// FUND-DEF: @_ZTSPKDh = constant +// FUND-DEF: @_ZTSPDh = constant // FUND-DEF: @_ZTIPKDh = constant -// FUND-HID: @_ZTSDh = hidden constant +// FUND-DEF: @_ZTSPKDh = constant // FUND-HID: @_ZTIDh = hidden constant -// FUND-HID: @_ZTSPDh = hidden constant +// FUND-HID: @_ZTSDh = hidden constant // FUND-HID: @_ZTIPDh = hidden constant -// FUND-HID: @_ZTSPKDh = hidden constant +// FUND-HID: @_ZTSPDh = hidden constant // FUND-HID: @_ZTIPKDh = hidden constant -// FUND-EXP: @_ZTSDh = dllexport constant +// FUND-HID: @_ZTSPKDh = hidden constant // FUND-EXP: @_ZTIDh = dllexport constant -// FUND-EXP: @_ZTSPDh = dllexport constant +// FUND-EXP: @_ZTSDh = dllexport constant // FUND-EXP: @_ZTIPDh = dllexport constant -// FUND-EXP: @_ZTSPKDh = dllexport constant +// FUND-EXP: @_ZTSPDh = dllexport constant // FUND-EXP: @_ZTIPKDh = dllexport constant +// FUND-EXP: @_ZTSPKDh = dllexport constant // float -// FUND-DEF: @_ZTSf = constant // FUND-DEF: @_ZTIf = constant -// FUND-DEF: @_ZTSPf = constant +// FUND-DEF: @_ZTSf = constant // FUND-DEF: @_ZTIPf = constant -// FUND-DEF: @_ZTSPKf = constant +// FUND-DEF: @_ZTSPf = constant // FUND-DEF: @_ZTIPKf = constant -// FUND-HID: @_ZTSf = hidden constant +// FUND-DEF: @_ZTSPKf = constant // FUND-HID: @_ZTIf = hidden constant -// FUND-HID: @_ZTSPf = hidden constant +// FUND-HID: @_ZTSf = hidden constant // FUND-HID: @_ZTIPf = hidden constant -// FUND-HID: @_ZTSPKf = hidden constant +// FUND-HID: @_ZTSPf = hidden constant // FUND-HID: @_ZTIPKf = hidden constant -// FUND-EXP: @_ZTSf = dllexport constant +// FUND-HID: @_ZTSPKf = hidden constant // FUND-EXP: @_ZTIf = dllexport constant -// FUND-EXP: @_ZTSPf = dllexport constant +// FUND-EXP: @_ZTSf = dllexport constant // FUND-EXP: @_ZTIPf = dllexport constant -// FUND-EXP: @_ZTSPKf = dllexport constant +// FUND-EXP: @_ZTSPf = dllexport constant // FUND-EXP: @_ZTIPKf = dllexport constant +// FUND-EXP: @_ZTSPKf = dllexport constant // double -// FUND-DEF: @_ZTSd = constant // FUND-DEF: @_ZTId = constant -// FUND-DEF: @_ZTSPd = constant +// FUND-DEF: @_ZTSd = constant // FUND-DEF: @_ZTIPd = constant -// FUND-DEF: @_ZTSPKd = constant +// FUND-DEF: @_ZTSPd = constant // FUND-DEF: @_ZTIPKd = constant -// FUND-HID: @_ZTSd = hidden constant +// FUND-DEF: @_ZTSPKd = constant // FUND-HID: @_ZTId = hidden constant -// FUND-HID: @_ZTSPd = hidden constant +// FUND-HID: @_ZTSd = hidden constant // FUND-HID: @_ZTIPd = hidden constant -// FUND-HID: @_ZTSPKd = hidden constant +// FUND-HID: @_ZTSPd = hidden constant // FUND-HID: @_ZTIPKd = hidden constant -// FUND-EXP: @_ZTSd = dllexport constant +// FUND-HID: @_ZTSPKd = hidden constant // FUND-EXP: @_ZTId = dllexport constant -// FUND-EXP: @_ZTSPd = dllexport constant +// FUND-EXP: @_ZTSd = dllexport constant // FUND-EXP: @_ZTIPd = dllexport constant -// FUND-EXP: @_ZTSPKd = dllexport constant +// FUND-EXP: @_ZTSPd = dllexport constant // FUND-EXP: @_ZTIPKd = dllexport constant +// FUND-EXP: @_ZTSPKd = dllexport constant // long double -// FUND-DEF: @_ZTSe = constant // FUND-DEF: @_ZTIe = constant -// FUND-DEF: @_ZTSPe = constant +// FUND-DEF: @_ZTSe = constant // FUND-DEF: @_ZTIPe = constant -// FUND-DEF: @_ZTSPKe = constant +// FUND-DEF: @_ZTSPe = constant // FUND-DEF: @_ZTIPKe = constant -// FUND-HID: @_ZTSe = hidden constant +// FUND-DEF: @_ZTSPKe = constant // FUND-HID: @_ZTIe = hidden constant -// FUND-HID: @_ZTSPe = hidden constant +// FUND-HID: @_ZTSe = hidden constant // FUND-HID: @_ZTIPe = hidden constant -// FUND-HID: @_ZTSPKe = hidden constant +// FUND-HID: @_ZTSPe = hidden constant // FUND-HID: @_ZTIPKe = hidden constant -// FUND-EXP: @_ZTSe = dllexport constant +// FUND-HID: @_ZTSPKe = hidden constant // FUND-EXP: @_ZTIe = dllexport constant -// FUND-EXP: @_ZTSPe = dllexport constant +// FUND-EXP: @_ZTSe = dllexport constant // FUND-EXP: @_ZTIPe = dllexport constant -// FUND-EXP: @_ZTSPKe = dllexport constant +// FUND-EXP: @_ZTSPe = dllexport constant // FUND-EXP: @_ZTIPKe = dllexport constant +// FUND-EXP: @_ZTSPKe = dllexport constant // __ieee128 -// FUND-DEF: @_ZTSu9__ieee128 = constant // FUND-DEF: @_ZTIu9__ieee128 = constant -// FUND-DEF: @_ZTSPu9__ieee128 = constant +// FUND-DEF: @_ZTSu9__ieee128 = constant // FUND-DEF: @_ZTIPu9__ieee128 = constant -// FUND-DEF: @_ZTSPKu9__ieee128 = constant +// FUND-DEF: @_ZTSPu9__ieee128 = constant // FUND-DEF: @_ZTIPKu9__ieee128 = constant -// FUND-HID: @_ZTSu9__ieee128 = hidden constant +// FUND-DEF: @_ZTSPKu9__ieee128 = constant // FUND-HID: @_ZTIu9__ieee128 = hidden constant -// FUND-HID: @_ZTSPu9__ieee128 = hidden constant +// FUND-HID: @_ZTSu9__ieee128 = hidden constant // FUND-HID: @_ZTIPu9__ieee128 = hidden constant -// FUND-HID: @_ZTSPKu9__ieee128 = hidden constant +// FUND-HID: @_ZTSPu9__ieee128 = hidden constant // FUND-HID: @_ZTIPKu9__ieee128 = hidden constant -// FUND-EXP: @_ZTSu9__ieee128 = dllexport constant +// FUND-HID: @_ZTSPKu9__ieee128 = hidden constant // FUND-EXP: @_ZTIu9__ieee128 = dllexport constant -// FUND-EXP: @_ZTSPu9__ieee128 = dllexport constant +// FUND-EXP: @_ZTSu9__ieee128 = dllexport constant // FUND-EXP: @_ZTIPu9__ieee128 = dllexport constant -// FUND-EXP: @_ZTSPKu9__ieee128 = dllexport constant +// FUND-EXP: @_ZTSPu9__ieee128 = dllexport constant // FUND-EXP: @_ZTIPKu9__ieee128 = dllexport constant +// FUND-EXP: @_ZTSPKu9__ieee128 = dllexport constant // char8_t -// FUND-DEF: @_ZTSDu = constant // FUND-DEF: @_ZTIDu = constant -// FUND-DEF: @_ZTSPDu = constant +// FUND-DEF: @_ZTSDu = constant // FUND-DEF: @_ZTIPDu = constant -// FUND-DEF: @_ZTSPKDu = constant +// FUND-DEF: @_ZTSPDu = constant // FUND-DEF: @_ZTIPKDu = constant -// FUND-HID: @_ZTSDu = hidden constant +// FUND-DEF: @_ZTSPKDu = constant // FUND-HID: @_ZTIDu = hidden constant -// FUND-HID: @_ZTSPDu = hidden constant +// FUND-HID: @_ZTSDu = hidden constant // FUND-HID: @_ZTIPDu = hidden constant -// FUND-HID: @_ZTSPKDu = hidden constant +// FUND-HID: @_ZTSPDu = hidden constant // FUND-HID: @_ZTIPKDu = hidden constant -// FUND-EXP: @_ZTSDu = dllexport constant +// FUND-HID: @_ZTSPKDu = hidden constant // FUND-EXP: @_ZTIDu = dllexport constant -// FUND-EXP: @_ZTSPDu = dllexport constant +// FUND-EXP: @_ZTSDu = dllexport constant // FUND-EXP: @_ZTIPDu = dllexport constant -// FUND-EXP: @_ZTSPKDu = dllexport constant +// FUND-EXP: @_ZTSPDu = dllexport constant // FUND-EXP: @_ZTIPKDu = dllexport constant +// FUND-EXP: @_ZTSPKDu = dllexport constant // char16_t -// FUND-DEF: @_ZTSDs = constant // FUND-DEF: @_ZTIDs = constant -// FUND-DEF: @_ZTSPDs = constant +// FUND-DEF: @_ZTSDs = constant // FUND-DEF: @_ZTIPDs = constant -// FUND-DEF: @_ZTSPKDs = constant +// FUND-DEF: @_ZTSPDs = constant // FUND-DEF: @_ZTIPKDs = constant -// FUND-HID: @_ZTSDs = hidden constant +// FUND-DEF: @_ZTSPKDs = constant // FUND-HID: @_ZTIDs = hidden constant -// FUND-HID: @_ZTSPDs = hidden constant +// FUND-HID: @_ZTSDs = hidden constant // FUND-HID: @_ZTIPDs = hidden constant -// FUND-HID: @_ZTSPKDs = hidden constant +// FUND-HID: @_ZTSPDs = hidden constant // FUND-HID: @_ZTIPKDs = hidden constant -// FUND-EXP: @_ZTSDs = dllexport constant +// FUND-HID: @_ZTSPKDs = hidden constant // FUND-EXP: @_ZTIDs = dllexport constant -// FUND-EXP: @_ZTSPDs = dllexport constant +// FUND-EXP: @_ZTSDs = dllexport constant // FUND-EXP: @_ZTIPDs = dllexport constant -// FUND-EXP: @_ZTSPKDs = dllexport constant +// FUND-EXP: @_ZTSPDs = dllexport constant // FUND-EXP: @_ZTIPKDs = dllexport constant +// FUND-EXP: @_ZTSPKDs = dllexport constant // char32_t -// FUND-DEF: @_ZTSDi = constant // FUND-DEF: @_ZTIDi = constant -// FUND-DEF: @_ZTSPDi = constant +// FUND-DEF: @_ZTSDi = constant // FUND-DEF: @_ZTIPDi = constant -// FUND-DEF: @_ZTSPKDi = constant +// FUND-DEF: @_ZTSPDi = constant // FUND-DEF: @_ZTIPKDi = constant -// FUND-HID: @_ZTSDi = hidden constant +// FUND-DEF: @_ZTSPKDi = constant // FUND-HID: @_ZTIDi = hidden constant -// FUND-HID: @_ZTSPDi = hidden constant +// FUND-HID: @_ZTSDi = hidden constant // FUND-HID: @_ZTIPDi = hidden constant -// FUND-HID: @_ZTSPKDi = hidden constant +// FUND-HID: @_ZTSPDi = hidden constant // FUND-HID: @_ZTIPKDi = hidden constant -// FUND-EXP: @_ZTSDi = dllexport constant +// FUND-HID: @_ZTSPKDi = hidden constant // FUND-EXP: @_ZTIDi = dllexport constant -// FUND-EXP: @_ZTSPDi = dllexport constant +// FUND-EXP: @_ZTSDi = dllexport constant // FUND-EXP: @_ZTIPDi = dllexport constant -// FUND-EXP: @_ZTSPKDi = dllexport constant +// FUND-EXP: @_ZTSPDi = dllexport constant // FUND-EXP: @_ZTIPKDi = dllexport constant +// FUND-EXP: @_ZTSPKDi = dllexport constant diff --git a/clang/test/CodeGenCXX/modules-vtable.cppm b/clang/test/CodeGenCXX/modules-vtable.cppm index 5cc3504d72628f..6589b9f3c5d649 100644 --- a/clang/test/CodeGenCXX/modules-vtable.cppm +++ b/clang/test/CodeGenCXX/modules-vtable.cppm @@ -40,13 +40,13 @@ inline Base::~Base() {} // CHECK: @_ZTVW3Mod4Base = unnamed_addr constant -// CHECK: @_ZTSW3Mod4Base = constant // CHECK: @_ZTIW3Mod4Base = constant +// CHECK: @_ZTSW3Mod4Base = constant // With the new Itanium C++ ABI, the linkage of vtables in modules don't need to be linkonce ODR. // CHECK-INLINE: @_ZTVW3Mod4Base = {{.*}}unnamed_addr constant -// CHECK-INLINE: @_ZTSW3Mod4Base = {{.*}}constant // CHECK-INLINE: @_ZTIW3Mod4Base = {{.*}}constant +// CHECK-INLINE: @_ZTSW3Mod4Base = {{.*}}constant module :private; int private_use() { @@ -61,12 +61,12 @@ int use() { return 43; } -// CHECK-NOT: @_ZTSW3Mod4Base // CHECK-NOT: @_ZTIW3Mod4Base +// CHECK-NOT: @_ZTSW3Mod4Base // CHECK: @_ZTVW3Mod4Base = external -// CHECK-INLINE-NOT: @_ZTSW3Mod4Base // CHECK-INLINE-NOT: @_ZTIW3Mod4Base +// CHECK-INLINE-NOT: @_ZTSW3Mod4Base // CHECK-INLINE: @_ZTVW3Mod4Base = external // Check the case that the declaration of the key function comes from another @@ -86,8 +86,8 @@ int a_use() { } // CHECK: @_ZTVW1M1C = unnamed_addr constant -// CHECK: @_ZTSW1M1C = constant // CHECK: @_ZTIW1M1C = constant +// CHECK: @_ZTSW1M1C = constant //--- M-B.cppm export module M:B; @@ -101,5 +101,5 @@ int b_use() { } // CHECK: @_ZTVW1M1C = external -// CHECK-NOT: @_ZTSW1M1C // CHECK-NOT: @_ZTIW1M1C +// CHECK-NOT: @_ZTSW1M1C diff --git a/clang/test/CodeGenCXX/ptrauth-rtti-layout.cpp b/clang/test/CodeGenCXX/ptrauth-rtti-layout.cpp index 2b633addd677e0..b50e0908f9db88 100644 --- a/clang/test/CodeGenCXX/ptrauth-rtti-layout.cpp +++ b/clang/test/CodeGenCXX/ptrauth-rtti-layout.cpp @@ -5,12 +5,12 @@ struct A { int a; }; +// DARWIN: @_ZTI1A = linkonce_odr hidden constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr inttoptr (i64 add (i64 ptrtoint (ptr @_ZTS1A to i64), i64 -9223372036854775808) to ptr) } // DARWIN: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // DARWIN: @_ZTS1A = linkonce_odr hidden constant [3 x i8] c"1A\00" -// DARWIN: @_ZTI1A = linkonce_odr hidden constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr inttoptr (i64 add (i64 ptrtoint (ptr @_ZTS1A to i64), i64 -9223372036854775808) to ptr) } +// ELF: @_ZTI1A = linkonce_odr constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr @_ZTS1A } // ELF: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // ELF: @_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00" -// ELF: @_ZTI1A = linkonce_odr constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr @_ZTS1A } auto ATI = typeid(A); diff --git a/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp b/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp index 174aeda89d1755..f4396e40270399 100644 --- a/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp +++ b/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp @@ -60,12 +60,13 @@ static_assert(__has_feature(ptrauth_type_info_vtable_pointer_discrimination) == extern "C" int disc_std_type_info = __builtin_ptrauth_string_discriminator("_ZTVSt9type_info"); // CHECK: @_ZTV10TestStruct = unnamed_addr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI10TestStruct, ptr ptrauth (ptr @_ZN10TestStructD1Ev, i32 0, i64 52216, ptr getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV10TestStruct, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN10TestStructD0Ev, i32 0, i64 39671, ptr getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV10TestStruct, i32 0, i32 0, i32 3))] }, align 8 -// CHECK: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] -// CHECK: @_ZTS10TestStruct = constant [13 x i8] c"10TestStruct\00", align 1 // NODISC: @_ZTI10TestStruct = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr @_ZTS10TestStruct }, align 8 -// DISC: @_ZTI10TestStruct = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2, i64 [[STDTYPEINFO_DISC]]), ptr @_ZTS10TestStruct }, align 8 +// DISC: @_ZTI10TestStruct = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2, i64 [[STDTYPEINFO_DISC]], ptr @_ZTI10TestStruct), ptr @_ZTS10TestStruct }, align 8 + +// CHECK: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] +// CHECK: @_ZTS10TestStruct = constant [13 x i8] c"10TestStruct\00", align 1 struct TestStruct { virtual ~TestStruct(); diff --git a/clang/test/CodeGenCXX/ptrauth-vtable-virtual-inheritance-thunk.cpp b/clang/test/CodeGenCXX/ptrauth-vtable-virtual-inheritance-thunk.cpp index 031bb48608af7c..b5c15a29eb6b95 100644 --- a/clang/test/CodeGenCXX/ptrauth-vtable-virtual-inheritance-thunk.cpp +++ b/clang/test/CodeGenCXX/ptrauth-vtable-virtual-inheritance-thunk.cpp @@ -94,30 +94,30 @@ // CHECK-SAME: ptr ptrauth (ptr @_ZN1AD1Ev, i32 0, i64 2043, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1A, i32 0, i32 0, i32 5)), // CHECK-SAME: ptr ptrauth (ptr @_ZN1AD0Ev, i32 0, i64 63674, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1A, i32 0, i32 0, i32 6))] }, align 8 +// CHECK: @_ZTI1A = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr @_ZTS1A }, align 8 + // CHECK: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // CHECK: @_ZTS1A = constant [3 x i8] c"1A\00", align 1 -// CHECK: @_ZTI1A = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr @_ZTS1A }, align 8 +// CHECK: @_ZTI1C = constant { ptr, ptr, i32, i32, ptr, i64 } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), i32 2), ptr @_ZTS1C, i32 0, i32 1, ptr @_ZTI1B, i64 -6141 }, align 8 // CHECK: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global [0 x ptr] // CHECK: @_ZTS1C = constant [3 x i8] c"1C\00", align 1 +// DARWIN: @_ZTI1B = linkonce_odr hidden constant { ptr, ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), i32 2), ptr inttoptr (i64 add (i64 ptrtoint (ptr @_ZTS1B to i64), i64 -9223372036854775808) to ptr), ptr @_ZTI1A }, align 8 +// ELF: @_ZTI1B = linkonce_odr constant { ptr, ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), i32 2), ptr @_ZTS1B, ptr @_ZTI1A }, comdat, align 8 + // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external global [0 x ptr] // DARWIN: @_ZTS1B = linkonce_odr hidden constant [3 x i8] c"1B\00", align 1 // ELF: @_ZTS1B = linkonce_odr constant [3 x i8] c"1B\00", comdat, align 1 -// DARWIN: @_ZTI1B = linkonce_odr hidden constant { ptr, ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), i32 2), ptr inttoptr (i64 add (i64 ptrtoint (ptr @_ZTS1B to i64), i64 -9223372036854775808) to ptr), ptr @_ZTI1A }, align 8 -// ELF: @_ZTI1B = linkonce_odr constant { ptr, ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), i32 2), ptr @_ZTS1B, ptr @_ZTI1A }, comdat, align 8 - -// CHECK: @_ZTI1C = constant { ptr, ptr, i32, i32, ptr, i64 } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), i32 2), ptr @_ZTS1C, i32 0, i32 1, ptr @_ZTI1B, i64 -6141 }, align 8 +// CHECK: @_ZTI1D = constant { ptr, ptr, i32, i32, ptr, i64 } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), i32 2), ptr @_ZTS1D, i32 0, i32 1, ptr @_ZTI1B, i64 -6141 }, align 8 // CHECK: @_ZTS1D = constant [3 x i8] c"1D\00", align 1 -// CHECK: @_ZTI1D = constant { ptr, ptr, i32, i32, ptr, i64 } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), i32 2), ptr @_ZTS1D, i32 0, i32 1, ptr @_ZTI1B, i64 -6141 }, align 8 - // CHECK: @_ZTV1E = unnamed_addr constant { [7 x ptr] } { [7 x ptr] [ptr null, ptr @_ZTI1E, // CHECK-SAME: ptr ptrauth (ptr @_ZN1E1fEv, i32 0, i64 28408, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1E, i32 0, i32 0, i32 2)), // CHECK-SAME: ptr ptrauth (ptr @_ZN1E1gEv, i32 0, i64 22926, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1E, i32 0, i32 0, i32 3)), @@ -125,10 +125,10 @@ // CHECK-SAME: ptr ptrauth (ptr @_ZN1ED1Ev, i32 0, i64 5817, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1E, i32 0, i32 0, i32 5)), // CHECK-SAME: ptr ptrauth (ptr @_ZN1ED0Ev, i32 0, i64 26464, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1E, i32 0, i32 0, i32 6))] }, align 8 -// CHECK: @_ZTS1E = constant [3 x i8] c"1E\00", align 1 - // CHECK: @_ZTI1E = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2), ptr @_ZTS1E }, align 8 +// CHECK: @_ZTS1E = constant [3 x i8] c"1E\00", align 1 + // CHECK: @_ZTC1F0_1C = unnamed_addr constant { [5 x ptr], [11 x ptr] } { [5 x ptr] [ptr inttoptr (i64 16 to ptr), ptr null, ptr @_ZTI1C, // CHECK-SAME: ptr ptrauth (ptr @_ZN1CD1Ev, i32 0, i64 31214, ptr getelementptr inbounds ({ [5 x ptr], [11 x ptr] }, ptr @_ZTC1F0_1C, i32 0, i32 0, i32 3)), // CHECK-SAME: ptr ptrauth (ptr @_ZN1CD0Ev, i32 0, i64 8507, ptr getelementptr inbounds ({ [5 x ptr], [11 x ptr] }, ptr @_ZTC1F0_1C, i32 0, i32 0, i32 4))], [11 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr null, ptr null, ptr inttoptr (i64 -16 to ptr), ptr @_ZTI1C, @@ -149,10 +149,10 @@ // CHECK-SAME: ptr ptrauth (ptr @_ZTv0_n48_N1DD1Ev, i32 0, i64 2043, ptr getelementptr inbounds ({ [7 x ptr], [11 x ptr] }, ptr @_ZTC1F8_1D, i32 0, i32 1, i32 9)), // CHECK-SAME: ptr ptrauth (ptr @_ZTv0_n48_N1DD0Ev, i32 0, i64 63674, ptr getelementptr inbounds ({ [7 x ptr], [11 x ptr] }, ptr @_ZTC1F8_1D, i32 0, i32 1, i32 10))] }, align 8 -// CHECK: @_ZTS1F = constant [3 x i8] c"1F\00", align 1 - // CHECK: @_ZTI1F = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64, ptr, i64 } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), i32 2), ptr @_ZTS1F, i32 3, i32 3, ptr @_ZTI1C, i64 2, ptr @_ZTI1D, i64 2050, ptr @_ZTI1E, i64 -8189 }, align 8 +// CHECK: @_ZTS1F = constant [3 x i8] c"1F\00", align 1 + // CHECK: @_ZTC1G0_1C = unnamed_addr constant { [5 x ptr], [11 x ptr] } { [5 x ptr] [ptr inttoptr (i64 24 to ptr), ptr null, ptr @_ZTI1C, // CHECK-SAME: ptr ptrauth (ptr @_ZN1CD1Ev, i32 0, i64 31214, ptr getelementptr inbounds ({ [5 x ptr], [11 x ptr] }, ptr @_ZTC1G0_1C, i32 0, i32 0, i32 3)), // CHECK-SAME: ptr ptrauth (ptr @_ZN1CD0Ev, i32 0, i64 8507, ptr getelementptr inbounds ({ [5 x ptr], [11 x ptr] }, ptr @_ZTC1G0_1C, i32 0, i32 0, i32 4))], [11 x ptr] [ptr inttoptr (i64 -24 to ptr), ptr null, ptr null, ptr null, ptr inttoptr (i64 -24 to ptr), ptr @_ZTI1C, @@ -173,10 +173,10 @@ // CHECK-SAME: ptr ptrauth (ptr @_ZTv0_n48_N1DD1Ev, i32 0, i64 2043, ptr getelementptr inbounds ({ [7 x ptr], [11 x ptr] }, ptr @_ZTC1G8_1D, i32 0, i32 1, i32 9)), // CHECK-SAME: ptr ptrauth (ptr @_ZTv0_n48_N1DD0Ev, i32 0, i64 63674, ptr getelementptr inbounds ({ [7 x ptr], [11 x ptr] }, ptr @_ZTC1G8_1D, i32 0, i32 1, i32 10))] }, align 8 -// CHECK: @_ZTS1G = constant [3 x i8] c"1G\00", align 1 - // CHECK: @_ZTI1G = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64, ptr, i64 } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), i32 2), ptr @_ZTS1G, i32 3, i32 3, ptr @_ZTI1E, i64 -8189, ptr @_ZTI1C, i64 2, ptr @_ZTI1D, i64 2050 }, align 8 +// CHECK: @_ZTS1G = constant [3 x i8] c"1G\00", align 1 + // CHECK: @_ZTV1B = linkonce_odr unnamed_addr constant { [7 x ptr] } { [7 x ptr] [ptr null, ptr @_ZTI1B, // CHECK-SAME: ptr ptrauth (ptr @_ZN1A1fEv, i32 0, i64 55636, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1B, i32 0, i32 0, i32 2)), // CHECK-SAME: ptr ptrauth (ptr @_ZN1A1gEv, i32 0, i64 19402, ptr getelementptr inbounds ({ [7 x ptr] }, ptr @_ZTV1B, i32 0, i32 0, i32 3)), diff --git a/clang/test/CodeGenCXX/rtti-linkage.cpp b/clang/test/CodeGenCXX/rtti-linkage.cpp index ca50a1bc6f01a7..03e7cdedd3462c 100644 --- a/clang/test/CodeGenCXX/rtti-linkage.cpp +++ b/clang/test/CodeGenCXX/rtti-linkage.cpp @@ -3,73 +3,73 @@ #include +// CHECK-BOTH: _ZTIP1C = internal constant // CHECK-BOTH: _ZTSP1C = internal constant -// CHECK-BOTH: _ZTS1C = internal constant // CHECK-BOTH: _ZTI1C = internal constant -// CHECK-BOTH: _ZTIP1C = internal constant -// CHECK-BOTH: _ZTSPP1C = internal constant +// CHECK-BOTH: _ZTS1C = internal constant // CHECK-BOTH: _ZTIPP1C = internal constant -// CHECK-BOTH: _ZTSM1Ci = internal constant +// CHECK-BOTH: _ZTSPP1C = internal constant // CHECK-BOTH: _ZTIM1Ci = internal constant -// CHECK-BOTH: _ZTSPM1Ci = internal constant +// CHECK-BOTH: _ZTSM1Ci = internal constant // CHECK-BOTH: _ZTIPM1Ci = internal constant -// CHECK-BOTH: _ZTSM1CS_ = internal constant +// CHECK-BOTH: _ZTSPM1Ci = internal constant // CHECK-BOTH: _ZTIM1CS_ = internal constant -// CHECK-BOTH: _ZTSM1CPS_ = internal constant +// CHECK-BOTH: _ZTSM1CS_ = internal constant // CHECK-BOTH: _ZTIM1CPS_ = internal constant +// CHECK-BOTH: _ZTSM1CPS_ = internal constant +// CHECK-BOTH: _ZTIM1A1C = internal constant // CHECK-BOTH: _ZTSM1A1C = internal constant -// CHECK: _ZTS1A = linkonce_odr constant -// CHECK-WITH-HIDDEN: _ZTS1A = linkonce_odr hidden constant // CHECK: _ZTI1A = linkonce_odr constant // CHECK-WITH-HIDDEN: _ZTI1A = linkonce_odr hidden constant -// CHECK-BOTH: _ZTIM1A1C = internal constant -// CHECK-BOTH: _ZTSM1AP1C = internal constant +// CHECK: _ZTS1A = linkonce_odr constant +// CHECK-WITH-HIDDEN: _ZTS1A = linkonce_odr hidden constant // CHECK-BOTH: _ZTIM1AP1C = internal constant +// CHECK-BOTH: _ZTSM1AP1C = internal constant // CHECK-WITH-HIDDEN: _ZTSFN12_GLOBAL__N_11DEvE = internal constant -// CHECK-WITH-HIDDEN: @_ZTSPK2T4 = linkonce_odr hidden constant -// CHECK-WITH-HIDDEN: @_ZTS2T4 = linkonce_odr hidden constant -// CHECK-WITH-HIDDEN: @_ZTI2T4 = linkonce_odr hidden constant -// CHECK-WITH-HIDDEN: @_ZTIPK2T4 = linkonce_odr hidden constant -// CHECK-WITH-HIDDEN: @_ZTSZ2t5vE1A = internal constant +// CHECK-WITH-HIDDEN: @_ZTIPK2T4 = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTSPK2T4 = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTI2T4 = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTS2T4 = linkonce_odr hidden constant // CHECK-WITH-HIDDEN: @_ZTIZ2t5vE1A = internal constant -// CHECK-WITH-HIDDEN: @_ZTSZ2t6vE1A = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTSZ2t5vE1A = internal constant // CHECK-WITH-HIDDEN: @_ZTIZ2t6vE1A = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTSZ2t6vE1A = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTIPZ2t7vE1A = linkonce_odr hidden constant // CHECK-WITH-HIDDEN: @_ZTSPZ2t7vE1A = linkonce_odr hidden constant -// CHECK-WITH-HIDDEN: @_ZTSZ2t7vE1A = linkonce_odr hidden constant // CHECK-WITH-HIDDEN: @_ZTIZ2t7vE1A = linkonce_odr hidden constant -// CHECK-WITH-HIDDEN: @_ZTIPZ2t7vE1A = linkonce_odr hidden constant +// CHECK-WITH-HIDDEN: @_ZTSZ2t7vE1A = linkonce_odr hidden constant -// CHECK: _ZTSN12_GLOBAL__N_11DE = internal constant // CHECK: _ZTIN12_GLOBAL__N_11DE = internal constant -// CHECK: _ZTSPN12_GLOBAL__N_11DE = internal constant +// CHECK: _ZTSN12_GLOBAL__N_11DE = internal constant // CHECK: _ZTIPN12_GLOBAL__N_11DE = internal constant -// CHECK: _ZTSFN12_GLOBAL__N_11DEvE = internal constant +// CHECK: _ZTSPN12_GLOBAL__N_11DE = internal constant // CHECK: _ZTIFN12_GLOBAL__N_11DEvE = internal constant -// CHECK: _ZTSFvN12_GLOBAL__N_11DEE = internal constant +// CHECK: _ZTSFN12_GLOBAL__N_11DEvE = internal constant // CHECK: _ZTIFvN12_GLOBAL__N_11DEE = internal constant +// CHECK: _ZTSFvN12_GLOBAL__N_11DEE = internal constant +// CHECK: _ZTIPFvvE = linkonce_odr constant // CHECK: _ZTSPFvvE = linkonce_odr constant -// CHECK: _ZTSFvvE = linkonce_odr constant // CHECK: _ZTIFvvE = linkonce_odr constant -// CHECK: _ZTIPFvvE = linkonce_odr constant -// CHECK: _ZTSN12_GLOBAL__N_11EE = internal constant +// CHECK: _ZTSFvvE = linkonce_odr constant // CHECK: _ZTIN12_GLOBAL__N_11EE = internal constant -// CHECK: _ZTSA10_i = linkonce_odr constant +// CHECK: _ZTSN12_GLOBAL__N_11EE = internal constant // CHECK: _ZTIA10_i = linkonce_odr constant +// CHECK: _ZTSA10_i = linkonce_odr constant // CHECK: _ZTI1TILj0EE = linkonce_odr constant // CHECK: _ZTI1TILj1EE = weak_odr constant // CHECK: _ZTI1TILj2EE = external constant -// CHECK: _ZTSZ2t5vE1A = internal constant // CHECK: _ZTIZ2t5vE1A = internal constant -// CHECK: _ZTS1B ={{.*}} constant +// CHECK: _ZTSZ2t5vE1A = internal constant // CHECK: _ZTI1B ={{.*}} constant +// CHECK: _ZTS1B ={{.*}} constant // CHECK: _ZTS1F = linkonce_odr constant -// CHECK: _ZTSZ2t6vE1A = linkonce_odr constant // CHECK: _ZTIZ2t6vE1A = linkonce_odr constant +// CHECK: _ZTSZ2t6vE1A = linkonce_odr constant +// CHECK: _ZTIPZ2t7vE1A = linkonce_odr constant // CHECK: _ZTSPZ2t7vE1A = linkonce_odr constant -// CHECK: _ZTSZ2t7vE1A = linkonce_odr constant // CHECK: _ZTIZ2t7vE1A = linkonce_odr constant -// CHECK: _ZTIPZ2t7vE1A = linkonce_odr constant +// CHECK: _ZTSZ2t7vE1A = linkonce_odr constant // CHECK: _ZTIN12_GLOBAL__N_11DE diff --git a/clang/test/CodeGenCXX/rtti-visibility.cpp b/clang/test/CodeGenCXX/rtti-visibility.cpp index 5945be5c73a260..1813fee658c72e 100644 --- a/clang/test/CodeGenCXX/rtti-visibility.cpp +++ b/clang/test/CodeGenCXX/rtti-visibility.cpp @@ -6,10 +6,10 @@ namespace Test1 { // A is explicitly marked hidden, so all RTTI data should also be marked hidden. - // CHECK-TEST1: @_ZTSN5Test11AE = linkonce_odr hidden constant // CHECK-TEST1: @_ZTIN5Test11AE = linkonce_odr hidden constant - // CHECK-TEST1: @_ZTSPN5Test11AE = linkonce_odr hidden constant + // CHECK-TEST1: @_ZTSN5Test11AE = linkonce_odr hidden constant // CHECK-TEST1: @_ZTIPN5Test11AE = linkonce_odr hidden constant + // CHECK-TEST1: @_ZTSPN5Test11AE = linkonce_odr hidden constant struct __attribute__((visibility("hidden"))) A { }; void f() { @@ -20,8 +20,8 @@ namespace Test1 { namespace Test2 { // A is weak, so its linkage should be linkoce_odr, but not marked hidden. - // CHECK-TEST2: @_ZTSN5Test21AE = linkonce_odr constant // CHECK-TEST2: @_ZTIN5Test21AE = linkonce_odr constant + // CHECK-TEST2: @_ZTSN5Test21AE = linkonce_odr constant struct A { }; void f() { (void)typeid(A); diff --git a/clang/test/CodeGenCXX/symbol-partition.cpp b/clang/test/CodeGenCXX/symbol-partition.cpp index ecc58e2a847dc1..cefeeac63f0147 100644 --- a/clang/test/CodeGenCXX/symbol-partition.cpp +++ b/clang/test/CodeGenCXX/symbol-partition.cpp @@ -2,8 +2,8 @@ // CHECK: @gv = {{.*}}, partition "foo" // CHECK: @_ZTV1S = {{.*}}, partition "foo" -// CHECK: @_ZTS1S = {{.*}}, partition "foo" // CHECK: @_ZTI1S = {{.*}}, partition "foo" +// CHECK: @_ZTS1S = {{.*}}, partition "foo" // CHECK: @_Z5ifuncv = {{.*}}, partition "foo" diff --git a/clang/test/CodeGenCXX/type_visibility.cpp b/clang/test/CodeGenCXX/type_visibility.cpp index 13aafcff0fa13e..00833e36944df2 100644 --- a/clang/test/CodeGenCXX/type_visibility.cpp +++ b/clang/test/CodeGenCXX/type_visibility.cpp @@ -26,12 +26,12 @@ namespace temp0 { template struct B; // FUNS-LABEL: define weak_odr void @_ZN5temp01BINS_1AEE3fooEv( // VARS: @_ZTVN5temp01BINS_1AEEE = weak_odr unnamed_addr constant - // VARS: @_ZTSN5temp01BINS_1AEEE = weak_odr constant // VARS: @_ZTIN5temp01BINS_1AEEE = weak_odr constant + // VARS: @_ZTSN5temp01BINS_1AEEE = weak_odr constant // FUNS-HIDDEN-LABEL: define weak_odr hidden void @_ZN5temp01BINS_1AEE3fooEv( // VARS-HIDDEN: @_ZTVN5temp01BINS_1AEEE = weak_odr hidden unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5temp01BINS_1AEEE = weak_odr hidden constant // VARS-HIDDEN: @_ZTIN5temp01BINS_1AEEE = weak_odr hidden constant + // VARS-HIDDEN: @_ZTSN5temp01BINS_1AEEE = weak_odr hidden constant } namespace temp1 { @@ -43,12 +43,12 @@ namespace temp1 { template struct B; // FUNS-LABEL: define weak_odr void @_ZN5temp11BINS_1AEE3fooEv( // VARS: @_ZTVN5temp11BINS_1AEEE = weak_odr unnamed_addr constant - // VARS: @_ZTSN5temp11BINS_1AEEE = weak_odr constant // VARS: @_ZTIN5temp11BINS_1AEEE = weak_odr constant + // VARS: @_ZTSN5temp11BINS_1AEEE = weak_odr constant // FUNS-HIDDEN-LABEL: define weak_odr hidden void @_ZN5temp11BINS_1AEE3fooEv( // VARS-HIDDEN: @_ZTVN5temp11BINS_1AEEE = weak_odr unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5temp11BINS_1AEEE = weak_odr constant // VARS-HIDDEN: @_ZTIN5temp11BINS_1AEEE = weak_odr constant + // VARS-HIDDEN: @_ZTSN5temp11BINS_1AEEE = weak_odr constant } namespace temp2 { @@ -60,12 +60,12 @@ namespace temp2 { template struct B; // FUNS-LABEL: define weak_odr void @_ZN5temp21BINS_1AEE3fooEv( // VARS: @_ZTVN5temp21BINS_1AEEE = weak_odr unnamed_addr constant - // VARS: @_ZTSN5temp21BINS_1AEEE = weak_odr constant // VARS: @_ZTIN5temp21BINS_1AEEE = weak_odr constant + // VARS: @_ZTSN5temp21BINS_1AEEE = weak_odr constant // FUNS-HIDDEN-LABEL: define weak_odr hidden void @_ZN5temp21BINS_1AEE3fooEv( // VARS-HIDDEN: @_ZTVN5temp21BINS_1AEEE = weak_odr hidden unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5temp21BINS_1AEEE = weak_odr hidden constant // VARS-HIDDEN: @_ZTIN5temp21BINS_1AEEE = weak_odr hidden constant + // VARS-HIDDEN: @_ZTSN5temp21BINS_1AEEE = weak_odr hidden constant } namespace temp3 { @@ -77,12 +77,12 @@ namespace temp3 { template struct B; // FUNS-LABEL: define weak_odr hidden void @_ZN5temp31BINS_1AEE3fooEv( // VARS: @_ZTVN5temp31BINS_1AEEE = weak_odr hidden unnamed_addr constant - // VARS: @_ZTSN5temp31BINS_1AEEE = weak_odr hidden constant // VARS: @_ZTIN5temp31BINS_1AEEE = weak_odr hidden constant + // VARS: @_ZTSN5temp31BINS_1AEEE = weak_odr hidden constant // FUNS-HIDDEN-LABEL: define weak_odr hidden void @_ZN5temp31BINS_1AEE3fooEv( // VARS-HIDDEN: @_ZTVN5temp31BINS_1AEEE = weak_odr hidden unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5temp31BINS_1AEEE = weak_odr hidden constant // VARS-HIDDEN: @_ZTIN5temp31BINS_1AEEE = weak_odr hidden constant + // VARS-HIDDEN: @_ZTSN5temp31BINS_1AEEE = weak_odr hidden constant } namespace temp4 { @@ -94,12 +94,12 @@ namespace temp4 { template struct B; // FUNS-LABEL: define weak_odr void @_ZN5temp41BINS_1AEE3fooEv( // VARS: @_ZTVN5temp41BINS_1AEEE = weak_odr hidden unnamed_addr constant - // VARS: @_ZTSN5temp41BINS_1AEEE = weak_odr hidden constant // VARS: @_ZTIN5temp41BINS_1AEEE = weak_odr hidden constant + // VARS: @_ZTSN5temp41BINS_1AEEE = weak_odr hidden constant // FUNS-HIDDEN-LABEL: define weak_odr hidden void @_ZN5temp41BINS_1AEE3fooEv( // VARS-HIDDEN: @_ZTVN5temp41BINS_1AEEE = weak_odr hidden unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5temp41BINS_1AEEE = weak_odr hidden constant // VARS-HIDDEN: @_ZTIN5temp41BINS_1AEEE = weak_odr hidden constant + // VARS-HIDDEN: @_ZTSN5temp41BINS_1AEEE = weak_odr hidden constant } namespace type0 { @@ -110,12 +110,12 @@ namespace type0 { void A::foo() {} // FUNS-LABEL: define void @_ZN5type01A3fooEv( // VARS: @_ZTVN5type01AE = unnamed_addr constant - // VARS: @_ZTSN5type01AE = constant // VARS: @_ZTIN5type01AE = constant + // VARS: @_ZTSN5type01AE = constant // FUNS-HIDDEN-LABEL: define hidden void @_ZN5type01A3fooEv( // VARS-HIDDEN: @_ZTVN5type01AE = unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5type01AE = constant // VARS-HIDDEN: @_ZTIN5type01AE = constant + // VARS-HIDDEN: @_ZTSN5type01AE = constant } namespace type1 { @@ -126,12 +126,12 @@ namespace type1 { void A::foo() {} // FUNS-LABEL: define hidden void @_ZN5type11A3fooEv( // VARS: @_ZTVN5type11AE = unnamed_addr constant - // VARS: @_ZTSN5type11AE = constant // VARS: @_ZTIN5type11AE = constant + // VARS: @_ZTSN5type11AE = constant // FUNS-HIDDEN-LABEL: define hidden void @_ZN5type11A3fooEv( // VARS-HIDDEN: @_ZTVN5type11AE = unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5type11AE = constant // VARS-HIDDEN: @_ZTIN5type11AE = constant + // VARS-HIDDEN: @_ZTSN5type11AE = constant } namespace type2 { @@ -142,12 +142,12 @@ namespace type2 { void A::foo() {} // FUNS-LABEL: define void @_ZN5type21A3fooEv( // VARS: @_ZTVN5type21AE = hidden unnamed_addr constant - // VARS: @_ZTSN5type21AE = hidden constant // VARS: @_ZTIN5type21AE = hidden constant + // VARS: @_ZTSN5type21AE = hidden constant // FUNS-HIDDEN-LABEL: define hidden void @_ZN5type21A3fooEv( // VARS-HIDDEN: @_ZTVN5type21AE = hidden unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5type21AE = hidden constant // VARS-HIDDEN: @_ZTIN5type21AE = hidden constant + // VARS-HIDDEN: @_ZTSN5type21AE = hidden constant } namespace type3 { @@ -158,11 +158,11 @@ namespace type3 { void A::foo() {} // FUNS-LABEL: define void @_ZN5type31A3fooEv( // VARS: @_ZTVN5type31AE = hidden unnamed_addr constant - // VARS: @_ZTSN5type31AE = hidden constant // VARS: @_ZTIN5type31AE = hidden constant + // VARS: @_ZTSN5type31AE = hidden constant // FUNS-HIDDEN-LABEL: define void @_ZN5type31A3fooEv( // VARS-HIDDEN: @_ZTVN5type31AE = hidden unnamed_addr constant - // VARS-HIDDEN: @_ZTSN5type31AE = hidden constant // VARS-HIDDEN: @_ZTIN5type31AE = hidden constant + // VARS-HIDDEN: @_ZTSN5type31AE = hidden constant } diff --git a/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp b/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp index 60eb8f17f91fd1..68eb5cb4864765 100644 --- a/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp +++ b/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp @@ -15,12 +15,12 @@ class B : A { // NO-AS: @_ZTISt9type_info = external constant ptr // AS: @_ZTIi = external addrspace(1) constant ptr addrspace(1) // NO-AS: @_ZTIi = external constant ptr +// AS: @_ZTI1A = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1A }, comdat, align 8 +// NO-AS: @_ZTI1A = linkonce_odr constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS1A }, comdat, align 8 // AS: @_ZTVN10__cxxabiv117__class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] // NO-AS: @_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] // AS: @_ZTS1A = linkonce_odr addrspace(1) constant [3 x i8] c"1A\00", comdat, align 1 // NO-AS: @_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00", comdat, align 1 -// AS: @_ZTI1A = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1A }, comdat, align 8 -// NO-AS: @_ZTI1A = linkonce_odr constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS1A }, comdat, align 8 // AS: @_ZTIf = external addrspace(1) constant ptr addrspace(1) // NO-AS: @_ZTIf = external constant ptr diff --git a/clang/test/CodeGenCXX/visibility-ms-compat.cpp b/clang/test/CodeGenCXX/visibility-ms-compat.cpp index 525691358832f8..0344803909cd44 100644 --- a/clang/test/CodeGenCXX/visibility-ms-compat.cpp +++ b/clang/test/CodeGenCXX/visibility-ms-compat.cpp @@ -24,8 +24,8 @@ namespace test0 { // CHECK: declare void @_ZN5test01A3barEv() const std::type_info &ti = typeid(A); - // CHECK-GLOBAL: @_ZTSN5test01AE = linkonce_odr constant // CHECK-GLOBAL: @_ZTIN5test01AE = linkonce_odr constant + // CHECK-GLOBAL: @_ZTSN5test01AE = linkonce_odr constant // CHECK-GLOBAL: @_ZN5test02tiE = hidden constant } @@ -40,8 +40,8 @@ namespace test1 { // CHECK: declare hidden void @_ZN5test11A3barEv() const std::type_info &ti = typeid(A); - // CHECK-GLOBAL: @_ZTSN5test11AE = linkonce_odr hidden constant // CHECK-GLOBAL: @_ZTIN5test11AE = linkonce_odr hidden constant + // CHECK-GLOBAL: @_ZTSN5test11AE = linkonce_odr hidden constant // CHECK-GLOBAL: @_ZN5test12tiE = hidden constant } @@ -56,8 +56,8 @@ namespace test2 { // CHECK: declare void @_ZN5test21A3barEv() const std::type_info &ti = typeid(A); - // CHECK-GLOBAL: @_ZTSN5test21AE = linkonce_odr constant // CHECK-GLOBAL: @_ZTIN5test21AE = linkonce_odr constant + // CHECK-GLOBAL: @_ZTSN5test21AE = linkonce_odr constant // CHECK-GLOBAL: @_ZN5test22tiE = hidden constant } @@ -73,8 +73,8 @@ namespace test3 { // CHECK: declare void @_ZN5test31BINS_1AEE3barEv() const std::type_info &ti = typeid(B); - // CHECK-GLOBAL: @_ZTSN5test31BINS_1AEEE = linkonce_odr constant // CHECK-GLOBAL: @_ZTIN5test31BINS_1AEEE = linkonce_odr constant + // CHECK-GLOBAL: @_ZTSN5test31BINS_1AEEE = linkonce_odr constant } namespace test4 { @@ -89,8 +89,8 @@ namespace test4 { // CHECK: declare void @_ZN5test41BINS_1AEE3barEv() const std::type_info &ti = typeid(B); - // CHECK-GLOBAL: @_ZTSN5test41BINS_1AEEE = linkonce_odr constant // CHECK-GLOBAL: @_ZTIN5test41BINS_1AEEE = linkonce_odr constant + // CHECK-GLOBAL: @_ZTSN5test41BINS_1AEEE = linkonce_odr constant } namespace test5 { @@ -105,6 +105,6 @@ namespace test5 { // CHECK: declare hidden void @_ZN5test51BINS_1AEE3barEv() const std::type_info &ti = typeid(B); - // CHECK-GLOBAL: @_ZTSN5test51BINS_1AEEE = linkonce_odr hidden constant // CHECK-GLOBAL: @_ZTIN5test51BINS_1AEEE = linkonce_odr hidden constant + // CHECK-GLOBAL: @_ZTSN5test51BINS_1AEEE = linkonce_odr hidden constant } diff --git a/clang/test/CodeGenCXX/vtable-align-address-space.cpp b/clang/test/CodeGenCXX/vtable-align-address-space.cpp index 5eac0bd75dc5ef..5eccf0a0d77d82 100644 --- a/clang/test/CodeGenCXX/vtable-align-address-space.cpp +++ b/clang/test/CodeGenCXX/vtable-align-address-space.cpp @@ -9,5 +9,5 @@ struct A { void A::f() {} // CHECK: @_ZTV1A ={{.*}} unnamed_addr addrspace(1) constant { [5 x ptr addrspace(1)] } { [5 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1A, ptr addrspace(1) addrspacecast (ptr @_ZN1A1fEv to ptr addrspace(1)), ptr addrspace(1) addrspacecast (ptr @_ZN1A1gEv to ptr addrspace(1)), ptr addrspace(1) addrspacecast (ptr @_ZN1A1hEv to ptr addrspace(1))] -// CHECK: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 // CHECK: @_ZTI1A ={{.*}} addrspace(1) constant { ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1A }, align 8 +// CHECK: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 diff --git a/clang/test/CodeGenCXX/vtable-align.cpp b/clang/test/CodeGenCXX/vtable-align.cpp index fb8ff1a582ec83..f1d5e09b9730b2 100644 --- a/clang/test/CodeGenCXX/vtable-align.cpp +++ b/clang/test/CodeGenCXX/vtable-align.cpp @@ -10,8 +10,8 @@ struct A { void A::f() {} // CHECK-32: @_ZTV1A ={{.*}} unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI1A, ptr @_ZN1A1fEv, ptr @_ZN1A1gEv, ptr @_ZN1A1hEv] }, align 4 -// CHECK-32: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 // CHECK-32: @_ZTI1A ={{.*}} constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 2), ptr @_ZTS1A }, align 4 +// CHECK-32: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 // CHECK-64: @_ZTV1A ={{.*}} unnamed_addr constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTI1A, ptr @_ZN1A1fEv, ptr @_ZN1A1gEv, ptr @_ZN1A1hEv] }, align 8 -// CHECK-64: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 // CHECK-64: @_ZTI1A ={{.*}} constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS1A }, align 8 +// CHECK-64: @_ZTS1A ={{.*}} constant [3 x i8] c"1A\00", align 1 diff --git a/clang/test/CodeGenCXX/vtable-available-externally.cpp b/clang/test/CodeGenCXX/vtable-available-externally.cpp index ab105260bc75aa..4415e24f3f1cb6 100644 --- a/clang/test/CodeGenCXX/vtable-available-externally.cpp +++ b/clang/test/CodeGenCXX/vtable-available-externally.cpp @@ -49,8 +49,8 @@ void g() { // This tests mainly that the typeinfo and typename constants have their linkage // updated correctly. -// CHECK-TEST2: @_ZTSN5Test21AE ={{.*}} constant // CHECK-TEST2: @_ZTIN5Test21AE ={{.*}} constant +// CHECK-TEST2: @_ZTSN5Test21AE ={{.*}} constant // CHECK-TEST2: @_ZTVN5Test21AE ={{.*}} unnamed_addr constant namespace Test2 { struct A { diff --git a/clang/test/CodeGenCXX/vtable-key-function-arm.cpp b/clang/test/CodeGenCXX/vtable-key-function-arm.cpp index a054fd87c8ea73..83889bf9f8dbc5 100644 --- a/clang/test/CodeGenCXX/vtable-key-function-arm.cpp +++ b/clang/test/CodeGenCXX/vtable-key-function-arm.cpp @@ -90,8 +90,8 @@ struct Test2a { // V-table should be defined with strong linkage. Test2a::Test2a() { use(typeid(Test2a)); } // CHECK: @_ZTV6Test2a ={{.*}} unnamed_addr constant -// CHECK-LATE: @_ZTS6Test2a ={{.*}} constant // CHECK-LATE: @_ZTI6Test2a ={{.*}} constant +// CHECK-LATE: @_ZTS6Test2a ={{.*}} constant // 'bar' becomes the key function when 'foo' is defined inline. void Test2a::bar() {} @@ -111,8 +111,8 @@ void Test2b::bar() {} // V-table should be defined with strong linkage. Test2b::Test2b() { use(typeid(Test2b)); } // CHECK: @_ZTV6Test2b ={{.*}} unnamed_addr constant -// CHECK-LATE: @_ZTS6Test2b ={{.*}} constant // CHECK-LATE: @_ZTI6Test2b ={{.*}} constant +// CHECK-LATE: @_ZTS6Test2b ={{.*}} constant inline void Test2b::foo() {} @@ -131,8 +131,8 @@ inline void Test2c::foo() {} // V-table should be defined with strong linkage. Test2c::Test2c() { use(typeid(Test2c)); } // CHECK: @_ZTV6Test2c ={{.*}} unnamed_addr constant -// CHECK: @_ZTS6Test2c ={{.*}} constant // CHECK: @_ZTI6Test2c ={{.*}} constant +// CHECK: @_ZTS6Test2c ={{.*}} constant /*** Test3a ******************************************************************/ @@ -145,8 +145,8 @@ struct Test3a { // V-table should be defined with weak linkage. Test3a::Test3a() { use(typeid(Test3a)); } // CHECK: @_ZTV6Test3a = linkonce_odr unnamed_addr constant -// CHECK-LATE: @_ZTS6Test3a = linkonce_odr constant // CHECK-LATE: @_ZTI6Test3a = linkonce_odr constant +// CHECK-LATE: @_ZTS6Test3a = linkonce_odr constant // There ceases to be a key function after these declarations. inline void Test3a::bar() {} @@ -166,8 +166,8 @@ inline void Test3b::bar() {} // V-table should be defined with weak linkage. Test3b::Test3b() { use(typeid(Test3b)); } // CHECK: @_ZTV6Test3b = linkonce_odr unnamed_addr constant -// CHECK-LATE: @_ZTS6Test3b = linkonce_odr constant // CHECK-LATE: @_ZTI6Test3b = linkonce_odr constant +// CHECK-LATE: @_ZTS6Test3b = linkonce_odr constant inline void Test3b::foo() {} @@ -186,8 +186,8 @@ inline void Test3c::foo() {} // V-table should be defined with weak linkage. Test3c::Test3c() { use(typeid(Test3c)); } // CHECK: @_ZTV6Test3c = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test3c = linkonce_odr constant // CHECK: @_ZTI6Test3c = linkonce_odr constant +// CHECK: @_ZTS6Test3c = linkonce_odr constant /*** Test4a ******************************************************************/ @@ -200,8 +200,8 @@ template struct Test4a { // V-table should be defined with weak linkage. template <> Test4a::Test4a() { use(typeid(Test4a)); } // CHECK: @_ZTV6Test4aIiE = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test4aIiE = linkonce_odr constant // CHECK: @_ZTI6Test4aIiE = linkonce_odr constant +// CHECK: @_ZTS6Test4aIiE = linkonce_odr constant // There ceases to be a key function after these declarations. template <> inline void Test4a::bar() {} @@ -221,8 +221,8 @@ template <> inline void Test4b::bar() {} // V-table should be defined with weak linkage. template <> Test4b::Test4b() { use(typeid(Test4b)); } // CHECK: @_ZTV6Test4bIiE = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test4bIiE = linkonce_odr constant // CHECK: @_ZTI6Test4bIiE = linkonce_odr constant +// CHECK: @_ZTS6Test4bIiE = linkonce_odr constant template <> inline void Test4b::foo() {} @@ -241,8 +241,8 @@ template <> inline void Test4c::foo() {} // V-table should be defined with weak linkage. template <> Test4c::Test4c() { use(typeid(Test4c)); } // CHECK: @_ZTV6Test4cIiE = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test4cIiE = linkonce_odr constant // CHECK: @_ZTI6Test4cIiE = linkonce_odr constant +// CHECK: @_ZTS6Test4cIiE = linkonce_odr constant /*** Test5a ******************************************************************/ @@ -258,8 +258,8 @@ template <> inline void Test5a::foo(); // V-table should be defined with weak linkage. template <> Test5a::Test5a() { use(typeid(Test5a)); } // CHECK: @_ZTV6Test5aIiE = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test5aIiE = linkonce_odr constant // CHECK: @_ZTI6Test5aIiE = linkonce_odr constant +// CHECK: @_ZTS6Test5aIiE = linkonce_odr constant // There ceases to be a key function after these declarations. template <> inline void Test5a::bar() {} @@ -280,8 +280,8 @@ template <> inline void Test5b::bar() {} // V-table should be defined with weak linkage. template <> Test5b::Test5b() { use(typeid(Test5b)); } // CHECK: @_ZTV6Test5bIiE = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test5bIiE = linkonce_odr constant // CHECK: @_ZTI6Test5bIiE = linkonce_odr constant +// CHECK: @_ZTS6Test5bIiE = linkonce_odr constant template <> inline void Test5a::foo(); template <> inline void Test5b::foo() {} @@ -303,5 +303,5 @@ template <> inline void Test5c::foo() {} // V-table should be defined with weak linkage. template <> Test5c::Test5c() { use(typeid(Test5c)); } // CHECK: @_ZTV6Test5cIiE = linkonce_odr unnamed_addr constant -// CHECK: @_ZTS6Test5cIiE = linkonce_odr constant // CHECK: @_ZTI6Test5cIiE = linkonce_odr constant +// CHECK: @_ZTS6Test5cIiE = linkonce_odr constant diff --git a/clang/test/CodeGenCXX/vtable-key-function-ios.cpp b/clang/test/CodeGenCXX/vtable-key-function-ios.cpp index ff2793ad51f948..43abfb62c73a6c 100644 --- a/clang/test/CodeGenCXX/vtable-key-function-ios.cpp +++ b/clang/test/CodeGenCXX/vtable-key-function-ios.cpp @@ -63,8 +63,8 @@ struct Test1a { // V-table needs to be defined weakly. Test1a::Test1a() { use(typeid(Test1a)); } // CHECK: @_ZTV6Test1a = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK-LATE: @_ZTS6Test1a = linkonce_odr {{(dso_local )?}}constant // CHECK-LATE: @_ZTI6Test1a = linkonce_odr {{(dso_local )?}}constant +// CHECK-LATE: @_ZTS6Test1a = linkonce_odr {{(dso_local )?}}constant // This defines the key function. inline void Test1a::foo() {} @@ -83,8 +83,8 @@ inline void Test1b::foo() {} // V-table should be defined weakly.. Test1b::Test1b() { use(typeid(Test1b)); } // CHECK: @_ZTV6Test1b = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK: @_ZTS6Test1b = linkonce_odr {{(dso_local )?}}constant // CHECK: @_ZTI6Test1b = linkonce_odr {{(dso_local )?}}constant +// CHECK: @_ZTS6Test1b = linkonce_odr {{(dso_local )?}}constant /*** Test2a ******************************************************************/ @@ -97,8 +97,8 @@ struct Test2a { // V-table should be defined with weak linkage. Test2a::Test2a() { use(typeid(Test2a)); } // CHECK: @_ZTV6Test2a = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK-LATE: @_ZTS6Test2a = linkonce_odr {{(dso_local )?}}constant // CHECK-LATE: @_ZTI6Test2a = linkonce_odr {{(dso_local )?}}constant +// CHECK-LATE: @_ZTS6Test2a = linkonce_odr {{(dso_local )?}}constant void Test2a::bar() {} inline void Test2a::foo() {} @@ -116,8 +116,8 @@ void Test2b::bar() {} // V-table should be defined with weak linkage. Test2b::Test2b() { use(typeid(Test2b)); } // CHECK: @_ZTV6Test2b = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK-LATE: @_ZTS6Test2b = linkonce_odr {{(dso_local )?}}constant // CHECK-LATE: @_ZTI6Test2b = linkonce_odr {{(dso_local )?}}constant +// CHECK-LATE: @_ZTS6Test2b = linkonce_odr {{(dso_local )?}}constant inline void Test2b::foo() {} @@ -135,8 +135,8 @@ inline void Test2c::foo() {} // V-table should be defined with weak linkage. Test2c::Test2c() { use(typeid(Test2c)); } // CHECK: @_ZTV6Test2c = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK: @_ZTS6Test2c = linkonce_odr {{(dso_local )?}}constant // CHECK: @_ZTI6Test2c = linkonce_odr {{(dso_local )?}}constant +// CHECK: @_ZTS6Test2c = linkonce_odr {{(dso_local )?}}constant /*** Test3a ******************************************************************/ @@ -149,8 +149,8 @@ struct Test3a { // V-table should be defined with weak linkage. Test3a::Test3a() { use(typeid(Test3a)); } // CHECK: @_ZTV6Test3a = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK-LATE: @_ZTS6Test3a = linkonce_odr {{(dso_local )?}}constant // CHECK-LATE: @_ZTI6Test3a = linkonce_odr {{(dso_local )?}}constant +// CHECK-LATE: @_ZTS6Test3a = linkonce_odr {{(dso_local )?}}constant // This defines the key function. inline void Test3a::bar() {} @@ -169,8 +169,8 @@ inline void Test3b::bar() {} // V-table should be defined with weak linkage. Test3b::Test3b() { use(typeid(Test3b)); } // CHECK: @_ZTV6Test3b = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK-LATE: @_ZTS6Test3b = linkonce_odr {{(dso_local )?}}constant // CHECK-LATE: @_ZTI6Test3b = linkonce_odr {{(dso_local )?}}constant +// CHECK-LATE: @_ZTS6Test3b = linkonce_odr {{(dso_local )?}}constant // This defines the key function. inline void Test3b::foo() {} @@ -190,5 +190,5 @@ inline void Test3c::foo() {} // V-table should be defined with weak linkage. Test3c::Test3c() { use(typeid(Test3c)); } // CHECK: @_ZTV6Test3c = linkonce_odr {{(dso_local )?}}unnamed_addr constant -// CHECK: @_ZTS6Test3c = linkonce_odr {{(dso_local )?}}constant // CHECK: @_ZTI6Test3c = linkonce_odr {{(dso_local )?}}constant +// CHECK: @_ZTS6Test3c = linkonce_odr {{(dso_local )?}}constant diff --git a/clang/test/CodeGenCXX/vtable-key-function-win-comdat.cpp b/clang/test/CodeGenCXX/vtable-key-function-win-comdat.cpp index dd4fd9f8754a8d..b3de2f63499995 100644 --- a/clang/test/CodeGenCXX/vtable-key-function-win-comdat.cpp +++ b/clang/test/CodeGenCXX/vtable-key-function-win-comdat.cpp @@ -15,11 +15,11 @@ Test1a::Test1a() { use(typeid(Test1a)); } inline void Test1a::foo() {} // CHECK: $_ZTV6Test1a = comdat any -// CHECK: $_ZTS6Test1a = comdat any // CHECK: $_ZTI6Test1a = comdat any -// CHECK-NOT: $_ZTS6Test1a.1 = comdat any +// CHECK: $_ZTS6Test1a = comdat any // CHECK-NOT: $_ZTI6Test1a.1 = comdat any +// CHECK-NOT: $_ZTS6Test1a.1 = comdat any // CHECK: @_ZTV6Test1a = linkonce_odr dso_local unnamed_addr constant {{.*}} ptr @_ZTI6Test1a -// CHECK: @_ZTS6Test1a = linkonce_odr dso_local constant // CHECK: @_ZTI6Test1a = linkonce_odr dso_local constant {{.*}} ptr @_ZTS6Test1a +// CHECK: @_ZTS6Test1a = linkonce_odr dso_local constant diff --git a/clang/test/CodeGenCXX/weak-extern-typeinfo.cpp b/clang/test/CodeGenCXX/weak-extern-typeinfo.cpp index 932d36f4abbd2b..8c948d16c90ec5 100644 --- a/clang/test/CodeGenCXX/weak-extern-typeinfo.cpp +++ b/clang/test/CodeGenCXX/weak-extern-typeinfo.cpp @@ -30,17 +30,17 @@ class V2 : public virtual V1 { void V1::foo() { } void V2::foo() { } -// CHECK: @_ZTS1A = weak_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI1A = weak_odr {{(dso_local |hidden )?}}constant -// CHECK: @_ZTS1B = weak_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTS1A = weak_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI1B = weak_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTS1B = weak_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTI1C = weak_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTS1C = weak_odr {{(dso_local |hidden )?}}constant -// CHECK: @_ZTS2T1 = linkonce_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI2T1 = linkonce_odr {{(dso_local |hidden )?}}constant -// CHECK: @_ZTS1T = linkonce_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTS2T1 = linkonce_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI1T = linkonce_odr {{(dso_local |hidden )?}}constant -// CHECK: @_ZTI1C = weak_odr {{(dso_local |hidden )?}}constant -// CHECK: @_ZTS2V1 = weak_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTS1T = linkonce_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI2V1 = weak_odr {{(dso_local |hidden )?}}constant -// CHECK: @_ZTS2V2 = weak_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTS2V1 = weak_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI2V2 = weak_odr {{(dso_local |hidden )?}}constant +// CHECK: @_ZTS2V2 = weak_odr {{(dso_local |hidden )?}}constant diff --git a/clang/test/CodeGenCXX/windows-itanium-type-info.cpp b/clang/test/CodeGenCXX/windows-itanium-type-info.cpp index 20bd78df509837..95b7b3a4b29e25 100644 --- a/clang/test/CodeGenCXX/windows-itanium-type-info.cpp +++ b/clang/test/CodeGenCXX/windows-itanium-type-info.cpp @@ -33,8 +33,8 @@ void f() { // CHECK-DAG: @_ZTI4base = external dllimport constant -// CHECK-EH-IMPORT: @_ZTS4base = linkonce_odr dso_local constant // CHECK-EH-IMPORT: @_ZTI4base = linkonce_odr dso_local constant +// CHECK-EH-IMPORT: @_ZTS4base = linkonce_odr dso_local constant struct __declspec(dllimport) gatekeeper {}; struct zuul : gatekeeper { diff --git a/clang/test/CodeGenObjCXX/rtti.mm b/clang/test/CodeGenObjCXX/rtti.mm index ee3df349af18d6..2fc6f8722f4398 100644 --- a/clang/test/CodeGenObjCXX/rtti.mm +++ b/clang/test/CodeGenObjCXX/rtti.mm @@ -4,19 +4,20 @@ namespace std { class type_info; } -// CHECK: @_ZTI1A = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv117__class_type_infoE{{.*}}@_ZTS1A @interface A @end -// CHECK: @_ZTI1B = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv120__si_class_type_infoE{{.*}}@_ZTS1B{{.*}}@_ZTI1A @interface B : A @end // CHECK: @_ZTIP1B = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv119__pointer_type_infoE{{.*}}@_ZTSP1B{{.*}}, i32 0, {{.*}}@_ZTI1B -// CHECK: @_ZTI11objc_object = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv117__class_type_infoE{{.*}}@_ZTS11objc_object +// CHECK: @_ZTI1B = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv120__si_class_type_infoE{{.*}}@_ZTS1B{{.*}}@_ZTI1A +// CHECK: @_ZTI1A = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv117__class_type_infoE{{.*}}@_ZTS1A + // CHECK: @_ZTIP11objc_object = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv119__pointer_type_infoE{{.*}}@_ZTSP11objc_object{{.*}}@_ZTI11objc_object -// CHECK: @_ZTI10objc_class = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv117__class_type_infoE{{.*}}@_ZTS10objc_class +// CHECK: @_ZTI11objc_object = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv117__class_type_infoE{{.*}}@_ZTS11objc_object // CHECK: @_ZTIP10objc_class = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv119__pointer_type_infoE{{.*}}@_ZTSP10objc_class{{.*}}@_ZTI10objc_class +// CHECK: @_ZTI10objc_class = linkonce_odr constant {{.*}}@_ZTVN10__cxxabiv117__class_type_infoE{{.*}}@_ZTS10objc_class @protocol P; diff --git a/clang/test/Driver/stack-protector-guard.c b/clang/test/Driver/stack-protector-guard.c index d8475a70e3709f..666c83079e5191 100644 --- a/clang/test/Driver/stack-protector-guard.c +++ b/clang/test/Driver/stack-protector-guard.c @@ -17,15 +17,15 @@ // RUN: FileCheck -check-prefix=CHECK-SYM %s // Invalid arch -// RUN: not %clang -target powerpc64le-linux-gnu -mstack-protector-guard=tls %s 2>&1 | \ +// RUN: not %clang -target mipsel-linux-gnu -mstack-protector-guard=tls %s 2>&1 | \ // RUN: FileCheck -check-prefix=INVALID-ARCH %s // INVALID-ARCH: unsupported option '-mstack-protector-guard=tls' for target -// RUN: not %clang -target powerpc64le-linux-gnu -mstack-protector-guard-reg=fs %s 2>&1 | \ +// RUN: not %clang -target mipsel-linux-gnu -mstack-protector-guard-reg=fs %s 2>&1 | \ // RUN: FileCheck -check-prefix=INVALID-ARCH2 %s // INVALID-ARCH2: unsupported option '-mstack-protector-guard-reg=fs' for target -// RUN: not %clang -target powerpc64le-linux-gnu -mstack-protector-guard-offset=10 %s 2>&1 | \ +// RUN: not %clang -target mipsel-linux-gnu -mstack-protector-guard-offset=10 %s 2>&1 | \ // RUN: FileCheck -check-prefix=INVALID-ARCH3 %s // INVALID-ARCH3: unsupported option '-mstack-protector-guard-offset=10' for target @@ -104,3 +104,54 @@ // RUN: FileCheck -check-prefix=INVALID-REG-RISCV %s // INVALID-REG-RISCV: error: invalid value 'sp' in 'mstack-protector-guard-reg=', expected one of: tp + +// RUN: %clang -### -target powerpc64-unknown-elf -mstack-protector-guard=tls -mstack-protector-guard-offset=24 -mstack-protector-guard-reg=r13 %s 2>&1 | \ +// RUN: FileCheck -v -check-prefix=CHECK-TLS-POWERPC64 %s +// RUN: %clang -### -target powerpc64-unknown-linux-gnu -mstack-protector-guard=global %s 2>&1 | \ +// RUN: FileCheck -check-prefix=CHECK-GLOBAL %s + +// RUN: not %clang -target powerpc64-unknown-linux-gnu -mstack-protector-guard=tls %s 2>&1 | \ +// RUN: FileCheck -check-prefix=MISSING-OFFSET %s + +// RUN: not %clang -target powerpc64-unknown-elf -mstack-protector-guard=sysreg %s 2>&1 | \ +// RUN: FileCheck -check-prefix=INVALID-VALUE2 %s + +// RUN: not %clang -target powerpc64-unknown-elf -mstack-protector-guard=tls \ +// RUN: -mstack-protector-guard-offset=20 -mstack-protector-guard-reg=r12 %s 2>&1 | \ +// RUN: FileCheck -check-prefix=INVALID-REG-POWERPC64 %s + +// CHECK-TLS-POWERPC64: "-cc1" {{.*}}"-mstack-protector-guard=tls" "-mstack-protector-guard-offset=24" "-mstack-protector-guard-reg=r13" +// INVALID-REG-POWERPC64: error: invalid value 'r12' in 'mstack-protector-guard-reg=', expected one of: r13 + +// RUN: %clang -### -target powerpc64le-unknown-elf -mstack-protector-guard=tls -mstack-protector-guard-offset=24 -mstack-protector-guard-reg=r13 %s 2>&1 | \ +// RUN: FileCheck -v -check-prefix=CHECK-TLS-POWERPC64 %s +// RUN: %clang -### -target powerpc64le-unknown-elf -mstack-protector-guard=global %s 2>&1 | \ +// RUN: FileCheck -check-prefix=CHECK-GLOBAL %s + +// RUN: not %clang -target powerpc64le-unknown-elf -mstack-protector-guard=tls %s 2>&1 | \ +// RUN: FileCheck -check-prefix=MISSING-OFFSET %s + +// RUN: not %clang -target powerpc64le-unknown-elf -mstack-protector-guard=sysreg %s 2>&1 | \ +// RUN: FileCheck -check-prefix=INVALID-VALUE2 %s + +// RUN: not %clang -target powerpc64le-unknown-elf -mstack-protector-guard=tls \ +// RUN: -mstack-protector-guard-offset=20 -mstack-protector-guard-reg=r12 %s 2>&1 | \ +// RUN: FileCheck -check-prefix=INVALID-REG-POWERPC64 %s + +// RUN: %clang -### -target ppc32-unknown-elf -mstack-protector-guard=tls -mstack-protector-guard-offset=24 -mstack-protector-guard-reg=r2 %s 2>&1 | \ +// RUN: FileCheck -v -check-prefix=CHECK-TLS-POWERPC32 %s +// RUN: %clang -### -target ppc32-unknown-elf -mstack-protector-guard=global %s 2>&1 | \ +// RUN: FileCheck -check-prefix=CHECK-GLOBAL %s + +// RUN: not %clang -target ppc32-unknown-elf -mstack-protector-guard=tls %s 2>&1 | \ +// RUN: FileCheck -check-prefix=MISSING-OFFSET %s + +// RUN: not %clang -target ppc32-unknown-elf -mstack-protector-guard=sysreg %s 2>&1 | \ +// RUN: FileCheck -check-prefix=INVALID-VALUE2 %s + +// RUN: not %clang -target ppc32-unknown-elf -mstack-protector-guard=tls \ +// RUN: -mstack-protector-guard-offset=20 -mstack-protector-guard-reg=r3 %s 2>&1 | \ +// RUN: FileCheck -check-prefix=INVALID-REG-POWERPC32 %s + +// CHECK-TLS-POWERPC32: "-cc1" {{.*}}"-mstack-protector-guard=tls" "-mstack-protector-guard-offset=24" "-mstack-protector-guard-reg=r2" +// INVALID-REG-POWERPC32: error: invalid value 'r3' in 'mstack-protector-guard-reg=', expected one of: r2 diff --git a/clang/test/Modules/no-external-type-id.cppm b/clang/test/Modules/no-external-type-id.cppm index a4ca389739fbb5..577b97f5930e75 100644 --- a/clang/test/Modules/no-external-type-id.cppm +++ b/clang/test/Modules/no-external-type-id.cppm @@ -23,7 +23,7 @@ export module b; import a; export int b(); -// CHECK: (); // CHECK: @_ZTVW3Mod11NonTemplate = {{.*}}external // CHECK: @_ZTVW3Mod8TemplateIcE = {{.*}}external // CHECK: @_ZTVW3Mod8TemplateIjE = {{.*}}weak_odr -// CHECK: @_ZTSW3Mod8TemplateIjE = {{.*}}weak_odr // CHECK: @_ZTIW3Mod8TemplateIjE = {{.*}}weak_odr +// CHECK: @_ZTSW3Mod8TemplateIjE = {{.*}}weak_odr // CHECK: @_ZTVW3Mod8TemplateIdE = {{.*}}external // CHECK: @_ZTVW3Mod8TemplateIiE = {{.*}}linkonce_odr -// CHECK: @_ZTSW3Mod8TemplateIiE = {{.*}}linkonce_odr // CHECK: @_ZTIW3Mod8TemplateIiE = {{.*}}linkonce_odr +// CHECK: @_ZTSW3Mod8TemplateIiE = {{.*}}linkonce_odr // CHECK: @_ZTVW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr -// CHECK: @_ZTSW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr // CHECK: @_ZTIW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr +// CHECK: @_ZTSW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr diff --git a/clang/test/Sema/constexpr.c b/clang/test/Sema/constexpr.c index eaa000b3b97758..3dcb0b3a7d95fd 100644 --- a/clang/test/Sema/constexpr.c +++ b/clang/test/Sema/constexpr.c @@ -374,3 +374,20 @@ void constexprif() { void constevalif() { if consteval (300) {} //expected-error {{expected '(' after 'if'}} } + +struct S11 { + int len; +}; +void ghissue112516() { + struct S11 *s11 = 0; + constexpr int num = s11->len; // expected-error {{constexpr variable 'num' must be initialized by a constant expression}} + void *Arr[num]; +} + +void ghissue109095() { + constexpr char c[] = { 'a' }; + constexpr int i = c[1]; // expected-error {{constexpr variable 'i' must be initialized by a constant expression}}\ + // expected-note {{declared here}} + _Static_assert(i == c[0]); // expected-error {{static assertion expression is not an integral constant expression}}\ + // expected-note {{initializer of 'i' is not a constant expression}} +} diff --git a/clang/test/SemaCXX/typeid-ref.cpp b/clang/test/SemaCXX/typeid-ref.cpp index f788b04077ecac..025816c42512e1 100644 --- a/clang/test/SemaCXX/typeid-ref.cpp +++ b/clang/test/SemaCXX/typeid-ref.cpp @@ -6,7 +6,7 @@ namespace std { struct X { }; void f() { - // CHECK: @_ZTS1X = linkonce_odr {{(dso_local |hidden )?}}constant // CHECK: @_ZTI1X = linkonce_odr {{(dso_local |hidden )?}}constant + // CHECK: @_ZTS1X = linkonce_odr {{(dso_local |hidden )?}}constant (void)typeid(X&); } diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 318f08c04759b9..9e8529050ed83d 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -184,6 +184,7 @@ TEST(ConfigParseTest, ParsesConfigurationBools) { CHECK_PARSE_BOOL(ObjCSpaceBeforeProtocolList); CHECK_PARSE_BOOL(Cpp11BracedListStyle); CHECK_PARSE_BOOL(RemoveBracesLLVM); + CHECK_PARSE_BOOL(RemoveEmptyLinesInUnwrappedLines); CHECK_PARSE_BOOL(RemoveSemicolon); CHECK_PARSE_BOOL(SkipMacroDefinitionBody); CHECK_PARSE_BOOL(SpacesInSquareBrackets); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 43513f18321bc0..8f4c92148adae4 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -28135,6 +28135,83 @@ TEST_F(FormatTest, BreakBinaryOperations) { Style); } +TEST_F(FormatTest, RemovesEmptyLinesInUnwrappedLines) { + auto Style = getLLVMStyle(); + Style.RemoveEmptyLinesInUnwrappedLines = true; + + verifyFormat("int c = a + b;", + "int c\n" + "\n" + " = a + b;", + Style); + + verifyFormat("enum : unsigned { AA = 0, BB } myEnum;", + "enum : unsigned\n" + "\n" + "{\n" + " AA = 0,\n" + " BB\n" + "} myEnum;", + Style); + + verifyFormat("class B : public E {\n" + "private:\n" + "};", + "class B : public E\n" + "\n" + "{\n" + "private:\n" + "};", + Style); + + verifyFormat( + "struct AAAAAAAAAAAAAAA test[3] = {{56, 23, \"hello\"}, {7, 5, \"!!\"}};", + "struct AAAAAAAAAAAAAAA test[3] = {{56,\n" + "\n" + " 23, \"hello\"},\n" + " {7, 5, \"!!\"}};", + Style); + + verifyFormat("int myFunction(int aaaaaaaaaaaaa, int ccccccccccccc, int d);", + "int myFunction(\n" + "\n" + " int aaaaaaaaaaaaa,\n" + "\n" + " int ccccccccccccc, int d);", + Style); + + verifyFormat("switch (e) {\n" + "case 1:\n" + " return e;\n" + "case 2:\n" + " return 2;\n" + "}", + "switch (\n" + "\n" + " e) {\n" + "case 1:\n" + " return e;\n" + "case 2:\n" + " return 2;\n" + "}", + Style); + + verifyFormat("while (true) {\n" + "}", + "while (\n" + "\n" + " true) {\n" + "}", + Style); + + verifyFormat("void loooonFunctionIsVeryLongButNotAsLongAsJavaTypeNames(\n" + " std::map *outputMap);", + "void loooonFunctionIsVeryLongButNotAsLongAsJavaTypeNames\n" + "\n" + " (std::map *outputMap);", + Style); +} + } // namespace } // namespace test } // namespace format diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 00776dac28a14b..60deae0c9b1129 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3554,6 +3554,12 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) { ASSERT_EQ(Tokens.size(), 21u) << Tokens; EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener); EXPECT_TOKEN(Tokens[16], tok::greater, TT_TemplateCloser); + + Tokens = + annotate("auto x{std::conditional_t{}};"); + ASSERT_EQ(Tokens.size(), 24u) << Tokens; + EXPECT_TOKEN(Tokens[6], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[18], tok::greater, TT_TemplateCloser); } } // namespace diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 1d79cc71dd977b..c9bf5d3ddf146a 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1373,6 +1373,10 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "typedef __clang_svbfloat16x3_t svbfloat16x3_t;\n"; OS << "typedef __clang_svbfloat16x4_t svbfloat16x4_t;\n"; + OS << "typedef __clang_svmfloat8x2_t svmfloat8x2_t;\n"; + OS << "typedef __clang_svmfloat8x3_t svmfloat8x3_t;\n"; + OS << "typedef __clang_svmfloat8x4_t svmfloat8x4_t;\n"; + OS << "typedef __SVCount_t svcount_t;\n\n"; OS << "enum svpattern\n"; diff --git a/compiler-rt/test/lsan/TestCases/print_threads.c b/compiler-rt/test/lsan/TestCases/print_threads.c index b3072da93fab62..a9389412af1ccf 100644 --- a/compiler-rt/test/lsan/TestCases/print_threads.c +++ b/compiler-rt/test/lsan/TestCases/print_threads.c @@ -2,6 +2,9 @@ // XFAIL: hwasan +// No pthread barriers on Darwin. +// UNSUPPORTED: darwin + #include #include #include diff --git a/flang/include/flang/Common/LangOptions.def b/flang/include/flang/Common/LangOptions.def index d3e1e972d1519f..1bfdba9cc2c1c7 100644 --- a/flang/include/flang/Common/LangOptions.def +++ b/flang/include/flang/Common/LangOptions.def @@ -20,6 +20,8 @@ LANGOPT(Name, Bits, Default) #endif ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Fast) ///< FP Contract Mode (off/fast) +/// signed integer overflow handling +ENUM_LANGOPT(SignedOverflowBehavior, SignedOverflowBehaviorTy, 1, SOB_Undefined) /// Indicate a build without the standard GPU libraries. LANGOPT(NoGPULib , 1, false) diff --git a/flang/include/flang/Common/LangOptions.h b/flang/include/flang/Common/LangOptions.h index 52a45047deb0e2..83f25cfbe26142 100644 --- a/flang/include/flang/Common/LangOptions.h +++ b/flang/include/flang/Common/LangOptions.h @@ -27,6 +27,14 @@ namespace Fortran::common { class LangOptionsBase { public: + enum SignedOverflowBehaviorTy { + // -fno-wrapv (default behavior in Flang) + SOB_Undefined, + + // -fwrapv + SOB_Defined, + }; + enum FPModeKind { // Do not fuse FP ops FPM_Off, diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index d3f17c3f939c16..231de533fbd30a 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -35,9 +35,8 @@ ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0) ENUM_LOWERINGOPT(Underscoring, unsigned, 1, 1) /// If true, assume the behavior of integer overflow is defined -/// (i.e. wraps around as two's complement). On by default. -/// TODO: make the default off -ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 1) +/// (i.e. wraps around as two's complement). Off by default. +ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0) /// If true, add nsw flags to loop variable increments. /// Off by default. diff --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h index a7c4c075d818ee..5ae32f70a11a7f 100644 --- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h @@ -182,7 +182,7 @@ struct VecTypeInfo { static inline VecTypeInfo getVecTypeFromFirType(mlir::Type firTy) { assert(mlir::isa(firTy)); VecTypeInfo vecTyInfo; - vecTyInfo.eleTy = mlir::dyn_cast(firTy).getEleTy(); + vecTyInfo.eleTy = mlir::dyn_cast(firTy).getElementType(); vecTyInfo.len = mlir::dyn_cast(firTy).getLen(); return vecTyInfo; } diff --git a/flang/include/flang/Optimizer/Dialect/FIRTypes.td b/flang/include/flang/Optimizer/Dialect/FIRTypes.td index 7ac8e0822ecc88..bfd00c34558349 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRTypes.td +++ b/flang/include/flang/Optimizer/Dialect/FIRTypes.td @@ -465,6 +465,8 @@ def fir_SequenceType : FIR_Type<"Sequence", "array"> { size = size * static_cast(extent); return size; } + + mlir::Type getElementType() const { return getEleTy(); } }]; } @@ -519,6 +521,8 @@ def fir_VectorType : FIR_Type<"Vector", "vector"> { let extraClassDeclaration = [{ static bool isValidElementType(mlir::Type t); + + mlir::Type getElementType() const { return getEleTy(); } }]; let skipDefaultBuilders = 1; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 9da804138062fd..c51bf3b16eda09 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1115,6 +1115,24 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } +/// Parses signed integer overflow options and populates the +/// CompilerInvocation accordingly. +/// Returns false if new errors are generated. +/// +/// \param [out] invoc Stores the processed arguments +/// \param [in] args The compiler invocation arguments to parse +/// \param [out] diags DiagnosticsEngine to report erros with +static bool parseIntegerOverflowArgs(CompilerInvocation &invoc, + llvm::opt::ArgList &args, + clang::DiagnosticsEngine &diags) { + Fortran::common::LangOptions &opts = invoc.getLangOpts(); + + if (args.getLastArg(clang::driver::options::OPT_fwrapv)) + opts.setSignedOverflowBehavior(Fortran::common::LangOptions::SOB_Defined); + + return true; +} + /// Parses all floating point related arguments and populates the /// CompilerInvocation accordingly. /// Returns false if new errors are generated. @@ -1255,6 +1273,18 @@ static bool parseLinkerOptionsArgs(CompilerInvocation &invoc, return true; } +static bool parseLangOptionsArgs(CompilerInvocation &invoc, + llvm::opt::ArgList &args, + clang::DiagnosticsEngine &diags) { + bool success = true; + + success &= parseIntegerOverflowArgs(invoc, args, diags); + success &= parseFloatingPointArgs(invoc, args, diags); + success &= parseVScaleArgs(invoc, args, diags); + + return success; +} + bool CompilerInvocation::createFromArgs( CompilerInvocation &invoc, llvm::ArrayRef commandLineArgs, clang::DiagnosticsEngine &diags, const char *argv0) { @@ -1363,9 +1393,7 @@ bool CompilerInvocation::createFromArgs( invoc.frontendOpts.mlirArgs = args.getAllArgValues(clang::driver::options::OPT_mmlir); - success &= parseFloatingPointArgs(invoc, args, diags); - - success &= parseVScaleArgs(invoc, args, diags); + success &= parseLangOptionsArgs(invoc, args, diags); success &= parseLinkerOptionsArgs(invoc, args, diags); @@ -1577,6 +1605,8 @@ void CompilerInvocation::setLoweringOptions() { loweringOpts.setUnderscoring(codegenOpts.Underscoring); const Fortran::common::LangOptions &langOptions = getLangOpts(); + loweringOpts.setIntegerWrapAround(langOptions.getSignedOverflowBehavior() == + Fortran::common::LangOptions::SOB_Defined); Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); // TODO: when LangOptions are finalized, we can represent // the math related options using Fortran::commmon::MathOptionsBase, diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp index 748be508235f17..556b330b967ce1 100644 --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -584,7 +584,8 @@ genInlinedArrayLit(Fortran::lower::AbstractConverter &converter, } while (con.IncrementSubscripts(subscripts)); } else if constexpr (T::category == Fortran::common::TypeCategory::Derived) { do { - mlir::Type eleTy = mlir::cast(arrayTy).getEleTy(); + mlir::Type eleTy = + mlir::cast(arrayTy).getElementType(); mlir::Value elementVal = genScalarLit(converter, loc, con.At(subscripts), eleTy, /*outlineInReadOnlyMemory=*/false); @@ -594,7 +595,7 @@ genInlinedArrayLit(Fortran::lower::AbstractConverter &converter, } else { llvm::SmallVector rangeStartIdx; uint64_t rangeSize = 0; - mlir::Type eleTy = mlir::cast(arrayTy).getEleTy(); + mlir::Type eleTy = mlir::cast(arrayTy).getElementType(); do { auto getElementVal = [&]() { return builder.createConvert(loc, eleTy, @@ -643,7 +644,7 @@ genOutlineArrayLit(Fortran::lower::AbstractConverter &converter, mlir::Location loc, mlir::Type arrayTy, const Fortran::evaluate::Constant &constant) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Type eleTy = mlir::cast(arrayTy).getEleTy(); + mlir::Type eleTy = mlir::cast(arrayTy).getElementType(); llvm::StringRef globalName = converter.getUniqueLitName( loc, std::make_unique(toEvExpr(constant)), eleTy); diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 87e2114e413059..46168b81dd3a03 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -1574,7 +1574,7 @@ class ScalarExprLowering { mlir::Location loc = getLoc(); mlir::Value addr = fir::getBase(array); mlir::Type arrTy = fir::dyn_cast_ptrEleTy(addr.getType()); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); mlir::Type seqTy = builder.getRefType(builder.getVarLenSeqTy(eleTy)); mlir::Type refTy = builder.getRefType(eleTy); mlir::Value base = builder.createConvert(loc, seqTy, addr); @@ -1659,7 +1659,7 @@ class ScalarExprLowering { mlir::Location loc = getLoc(); mlir::Value addr = fir::getBase(exv); mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(addr.getType()); - mlir::Type eleTy = mlir::cast(arrTy).getEleTy(); + mlir::Type eleTy = mlir::cast(arrTy).getElementType(); mlir::Type refTy = builder.getRefType(eleTy); mlir::IndexType idxTy = builder.getIndexType(); llvm::SmallVector arrayCoorArgs; @@ -4145,7 +4145,7 @@ class ArrayExprLowering { mlir::Location loc = getLoc(); return [=, builder = &converter.getFirOpBuilder()](IterSpace iters) { mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(tmp.getType()); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); mlir::Type eleRefTy = builder->getRefType(eleTy); mlir::IntegerType i1Ty = builder->getI1Type(); // Adjust indices for any shift of the origin of the array. @@ -5759,7 +5759,7 @@ class ArrayExprLowering { return fir::BoxValue(embox, lbounds, nonDeferredLenParams); }; } - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); if (isReferentiallyOpaque()) { // Semantics are an opaque reference to an array. // This case forwards a continuation that will generate the address diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp index 93b78fd3357fac..e93fbc562f9b13 100644 --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -579,7 +579,8 @@ class HlfirDesignatorBuilder { return createVectorSubscriptElementAddrOp(partInfo, baseType, resultExtents); - mlir::Type resultType = mlir::cast(baseType).getEleTy(); + mlir::Type resultType = + mlir::cast(baseType).getElementType(); if (!resultTypeShape.empty()) { // Ranked array section. The result shape comes from the array section // subscripts. @@ -811,7 +812,7 @@ class HlfirDesignatorBuilder { } } builder.setInsertionPoint(elementalAddrOp); - return mlir::cast(baseType).getEleTy(); + return mlir::cast(baseType).getElementType(); } /// Yield the designator for the final part-ref inside the diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 8b03d60e47ca64..cc51d5a9bb8daf 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -518,7 +518,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, // type does not support nested structures. if (mlir::isa(symTy) && !Fortran::semantics::IsAllocatableOrPointer(sym)) { - mlir::Type eleTy = mlir::cast(symTy).getEleTy(); + mlir::Type eleTy = mlir::cast(symTy).getElementType(); if (mlir::isa(eleTy)) { const auto *details = diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index e6143275ce1d44..462193a850c487 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3824,7 +3824,7 @@ IntrinsicLibrary::genReduction(FN func, FD funcDim, llvm::StringRef errMsg, if (absentDim || rank == 1) { mlir::Type ty = array.getType(); mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); if (fir::isa_complex(eleTy)) { mlir::Value result = builder.createTemporary(loc, eleTy); func(builder, loc, array, mask, result); @@ -6137,7 +6137,7 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, mlir::Type ty = array.getType(); mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - mlir::Type eleTy = mlir::cast(arrTy).getEleTy(); + mlir::Type eleTy = mlir::cast(arrTy).getElementType(); // Handle optional arguments bool absentDim = isStaticallyAbsent(args[2]); diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp index 7f09e882284465..b3b07d18a956ba 100644 --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -2797,7 +2797,7 @@ void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef args) { if (vType != targetType) { if (mlir::isa(targetType)) { // Perform vector type conversion for arguments passed by value. - auto eleTy{mlir::dyn_cast(vType).getEleTy()}; + auto eleTy{mlir::dyn_cast(vType).getElementType()}; auto len{mlir::dyn_cast(vType).getLen()}; mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy); auto v0{builder.createConvert(loc, mlirType, v)}; diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp index b39824428c78a9..f6627dff671e6f 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp @@ -1157,7 +1157,7 @@ void fir::runtime::genMaxloc(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value back) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); fir::factory::CharacterExprHelper charHelper{builder, loc}; auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); mlir::func::FuncOp func; @@ -1189,7 +1189,7 @@ mlir::Value fir::runtime::genMaxval(fir::FirOpBuilder &builder, mlir::Value maskBox) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getIndexType(), 0); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); mlir::func::FuncOp func; @@ -1241,7 +1241,7 @@ void fir::runtime::genMinloc(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value back) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); mlir::func::FuncOp func; REAL_INTRINSIC_INSTANCES(Minloc, ) @@ -1298,7 +1298,7 @@ mlir::Value fir::runtime::genMinval(fir::FirOpBuilder &builder, mlir::Value maskBox) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getIndexType(), 0); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); @@ -1326,7 +1326,7 @@ void fir::runtime::genNorm2Dim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::func::FuncOp func; auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); if (eleTy.isF128()) func = fir::runtime::getRuntimeFunc(loc, builder); else @@ -1348,7 +1348,7 @@ mlir::Value fir::runtime::genNorm2(fir::FirOpBuilder &builder, mlir::func::FuncOp func; auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getIndexType(), 0); if (eleTy.isF32()) @@ -1398,7 +1398,7 @@ mlir::Value fir::runtime::genProduct(fir::FirOpBuilder &builder, mlir::Value resultBox) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getIndexType(), 0); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); @@ -1482,7 +1482,7 @@ mlir::Value fir::runtime::genSum(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value resultBox) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getIndexType(), 0); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); @@ -1521,7 +1521,7 @@ mlir::Value fir::runtime::genSum(fir::FirOpBuilder &builder, mlir::Location loc, mlir::func::FuncOp func; \ auto ty = arrayBox.getType(); \ auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); \ - auto eleTy = mlir::cast(arrTy).getEleTy(); \ + auto eleTy = mlir::cast(arrTy).getElementType(); \ auto dim = builder.createIntegerConstant(loc, builder.getIndexType(), 0); \ \ if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) \ @@ -1596,7 +1596,7 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, bool argByRef) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getI32Type(), 1); assert(resultBox && "expect non null value for the result"); @@ -1646,7 +1646,7 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder, bool argByRef) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto dim = builder.createIntegerConstant(loc, builder.getI32Type(), 1); assert((fir::isa_real(eleTy) || fir::isa_integer(eleTy) || @@ -1687,7 +1687,7 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value resultBox, bool argByRef) { auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); - auto eleTy = mlir::cast(arrTy).getEleTy(); + auto eleTy = mlir::cast(arrTy).getElementType(); auto [cat, kind] = fir::mlirTypeToCategoryKind(loc, eleTy); mlir::func::FuncOp func; diff --git a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp index 8f08b01fe0097a..50f14abd01c131 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp @@ -365,11 +365,11 @@ void fir::runtime::genMatmul(fir::FirOpBuilder &builder, mlir::Location loc, mlir::func::FuncOp func; auto boxATy = matrixABox.getType(); auto arrATy = fir::dyn_cast_ptrOrBoxEleTy(boxATy); - auto arrAEleTy = mlir::cast(arrATy).getEleTy(); + auto arrAEleTy = mlir::cast(arrATy).getElementType(); auto [aCat, aKind] = fir::mlirTypeToCategoryKind(loc, arrAEleTy); auto boxBTy = matrixBBox.getType(); auto arrBTy = fir::dyn_cast_ptrOrBoxEleTy(boxBTy); - auto arrBEleTy = mlir::cast(arrBTy).getEleTy(); + auto arrBEleTy = mlir::cast(arrBTy).getElementType(); auto [bCat, bKind] = fir::mlirTypeToCategoryKind(loc, arrBEleTy); #define MATMUL_INSTANCE(ACAT, AKIND, BCAT, BKIND) \ @@ -417,11 +417,11 @@ void fir::runtime::genMatmulTranspose(fir::FirOpBuilder &builder, mlir::func::FuncOp func; auto boxATy = matrixABox.getType(); auto arrATy = fir::dyn_cast_ptrOrBoxEleTy(boxATy); - auto arrAEleTy = mlir::cast(arrATy).getEleTy(); + auto arrAEleTy = mlir::cast(arrATy).getElementType(); auto [aCat, aKind] = fir::mlirTypeToCategoryKind(loc, arrAEleTy); auto boxBTy = matrixBBox.getType(); auto arrBTy = fir::dyn_cast_ptrOrBoxEleTy(boxBTy); - auto arrBEleTy = mlir::cast(arrBTy).getEleTy(); + auto arrBEleTy = mlir::cast(arrBTy).getElementType(); auto [bCat, bKind] = fir::mlirTypeToCategoryKind(loc, arrBEleTy); #define MATMUL_INSTANCE(ACAT, AKIND, BCAT, BKIND) \ diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 68b8c6613585e6..e6eeb0d5db4a84 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2619,7 +2619,7 @@ struct CoordinateOpConversion dims = dimsLeft - 1; continue; } - cpnTy = mlir::cast(cpnTy).getEleTy(); + cpnTy = mlir::cast(cpnTy).getElementType(); // append array range in reverse (FIR arrays are column-major) offs.append(arrIdx.rbegin(), arrIdx.rend()); arrIdx.clear(); @@ -2633,7 +2633,7 @@ struct CoordinateOpConversion arrIdx.push_back(nxtOpnd); continue; } - cpnTy = mlir::cast(cpnTy).getEleTy(); + cpnTy = mlir::cast(cpnTy).getElementType(); offs.push_back(nxtOpnd); continue; } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 90ce8b87605912..cdcf9bda49a627 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1359,7 +1359,7 @@ bool fir::ConvertOp::isPointerCompatible(mlir::Type ty) { static std::optional getVectorElementType(mlir::Type ty) { mlir::Type elemTy; if (mlir::isa(ty)) - elemTy = mlir::dyn_cast(ty).getEleTy(); + elemTy = mlir::dyn_cast(ty).getElementType(); else if (mlir::isa(ty)) elemTy = mlir::dyn_cast(ty).getElementType(); else @@ -1533,7 +1533,7 @@ llvm::LogicalResult fir::CoordinateOp::verify() { } if (dimension) { if (--dimension == 0) - eleTy = mlir::cast(eleTy).getEleTy(); + eleTy = mlir::cast(eleTy).getElementType(); } else { if (auto t = mlir::dyn_cast(eleTy)) { // FIXME: Generally, we don't know which field of the tuple is being @@ -3817,7 +3817,7 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, //===----------------------------------------------------------------------===// inline fir::CharacterType::KindTy stringLitOpGetKind(fir::StringLitOp op) { - auto eleTy = mlir::cast(op.getType()).getEleTy(); + auto eleTy = mlir::cast(op.getType()).getElementType(); return mlir::cast(eleTy).getFKind(); } diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index bdb8a7249f1a35..473ed2be3dbca7 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -68,11 +68,23 @@ class OmpWorkshareBlockChecker { if (const auto *e{GetExpr(context_, expr)}) { for (const Symbol &symbol : evaluate::CollectSymbols(*e)) { const Symbol &root{GetAssociationRoot(symbol)}; - if (IsFunction(root) && !IsElementalProcedure(root)) { - context_.Say(expr.source, - "User defined non-ELEMENTAL function " - "'%s' is not allowed in a WORKSHARE construct"_err_en_US, - root.name()); + if (IsFunction(root)) { + std::string attrs{""}; + if (!IsElementalProcedure(root)) { + attrs = " non-ELEMENTAL"; + } + if (root.attrs().test(Attr::IMPURE)) { + if (attrs != "") { + attrs = "," + attrs; + } + attrs = " IMPURE" + attrs; + } + if (attrs != "") { + context_.Say(expr.source, + "User defined%s function '%s' is not allowed in a " + "WORKSHARE construct"_err_en_US, + attrs, root.name()); + } } } } @@ -2273,6 +2285,21 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) { } } } + + // 2.11.5 Simd construct restriction (OpenMP 5.1) + if (auto *sl_clause{FindClause(llvm::omp::Clause::OMPC_safelen)}) { + if (auto *o_clause{FindClause(llvm::omp::Clause::OMPC_order)}) { + const auto &orderClause{ + std::get(o_clause->u)}; + if (std::get(orderClause.v.t) == + parser::OmpOrderClause::Type::Concurrent) { + context_.Say(sl_clause->source, + "The `SAFELEN` clause cannot appear in the `SIMD` directive " + "with `ORDER(CONCURRENT)` clause"_err_en_US); + } + } + } + // Sema checks related to presence of multiple list items within the same // clause CheckMultListItems(); diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index 35adb47b56861e..382c1aa5d350b7 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -14,6 +14,7 @@ ! RUN: -fno-signed-zeros \ ! RUN: -fassociative-math \ ! RUN: -freciprocal-math \ +! RUN: -fno-strict-overflow \ ! RUN: -fomit-frame-pointer \ ! RUN: -fpass-plugin=Bye%pluginext \ ! RUN: -fversion-loops-for-stride \ @@ -63,4 +64,5 @@ ! CHECK: "-Rpass=inline" ! CHECK: "-mframe-pointer=none" ! CHECK: "-mllvm" "-print-before-all" +! CHECK: "-fwrapv" ! CHECK: "-save-temps=obj" diff --git a/flang/test/Driver/integer-overflow.f90 b/flang/test/Driver/integer-overflow.f90 new file mode 100644 index 00000000000000..023f39fa5413ff --- /dev/null +++ b/flang/test/Driver/integer-overflow.f90 @@ -0,0 +1,10 @@ +! Test for correct forwarding of integer overflow flags from the compiler driver +! to the frontend driver + +! RUN: %flang -### -fno-strict-overflow %s 2>&1 | FileCheck %s --check-prefix=INDUCED +! RUN: %flang -### -fstrict-overflow %s 2>&1 | FileCheck %s +! RUN: %flang -### -fno-wrapv %s 2>&1 | FileCheck %s +! RUN: %flang -### -fno-wrapv -fno-strict-overflow %s 2>&1 | FileCheck %s + +! CHECK-NOT: "-fno-wrapv" +! INDUCED: "-fwrapv" diff --git a/flang/test/Integration/OpenMP/atomic-capture-complex.f90 b/flang/test/Integration/OpenMP/atomic-capture-complex.f90 new file mode 100644 index 00000000000000..4ffd18097d79ee --- /dev/null +++ b/flang/test/Integration/OpenMP/atomic-capture-complex.f90 @@ -0,0 +1,50 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +!RUN: %if x86-registered-target %{ %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fopenmp %s -o - | FileCheck --check-prefixes=CHECK,X86 %s %} +!RUN: %if aarch64-registerd-target %{ %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-llvm -fopenmp %s -o - | FileCheck --check-prefixes=CHECK,AARCH64 %s %} + +!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8 +!CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8 +!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 +!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 +!CHECK: br label %entry + +!CHECK: entry: +!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 +!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) +!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 +!CHECK: br label %.atomic.cont + +!CHECK: .atomic.cont +!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 +!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 +!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 +!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 +!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 +!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 +!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 +!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], +!i32 2, i32 2) +!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 +!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont + +!CHECK: .atomic.exit +!AARCH64: %[[LCSSA:.*]] = phi { float, float } [ %[[VAL_10]], %.atomic.cont ] +!AARCH64: store { float, float } %[[LCSSA]], ptr %[[VAL_1]], align 4 +!X86: store { float, float } %[[VAL_10]], ptr %[[VAL_1]], align 4 + +program main + complex*8 ia, ib + ia = (2, 2) + !$omp atomic capture + ia = ia + (1, 1) + ib = ia + !$omp end atomic +end program diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 24540492e73271..1a7a57b124e9bd 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -390,6 +390,12 @@ enddo !$omp end parallel + !ERROR: The `SAFELEN` clause cannot appear in the `SIMD` directive with `ORDER(CONCURRENT)` clause + !$omp simd order(concurrent) safelen(1+2) + do i = 1, N + a = 3.14 + enddo + ! 2.11.1 parallel-do-clause -> parallel-clause | ! do-clause diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index 4f2512937ace4e..480bd45b79b839 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -30,5 +30,11 @@ program omp_doCollapse do end do end do -end program omp_doCollapse + !ERROR: At most one COLLAPSE clause can appear on the SIMD directive + !$omp simd collapse(2) collapse(1) + do i = 1, 4 + j = j + i + 1 + end do + !$omp end simd +end program omp_doCollapse diff --git a/flang/test/Semantics/OpenMP/loop-association.f90 b/flang/test/Semantics/OpenMP/loop-association.f90 index d2167663c5ddea..9fac508e6128a7 100644 --- a/flang/test/Semantics/OpenMP/loop-association.f90 +++ b/flang/test/Semantics/OpenMP/loop-association.f90 @@ -131,4 +131,10 @@ !$omp end parallel do simd !ERROR: The END PARALLEL DO SIMD directive must follow the DO loop associated with the loop construct !$omp end parallel do simd + + !ERROR: A DO loop must follow the SIMD directive + !$omp simd + a = i + 1 + !ERROR: The END SIMD directive must follow the DO loop associated with the loop construct + !$omp end simd end diff --git a/flang/test/Semantics/OpenMP/workshare02.f90 b/flang/test/Semantics/OpenMP/workshare02.f90 index 11f33d63a3eb80..dddaa354fff9fa 100644 --- a/flang/test/Semantics/OpenMP/workshare02.f90 +++ b/flang/test/Semantics/OpenMP/workshare02.f90 @@ -9,6 +9,14 @@ module my_mod integer function my_func() my_func = 10 end function my_func + + impure integer function impure_my_func() + impure_my_func = 20 + end function impure_my_func + + impure elemental integer function impure_ele_my_func() + impure_ele_my_func = 20 + end function impure_ele_my_func end module my_mod subroutine workshare(aa, bb, cc, dd, ee, ff, n) @@ -61,6 +69,16 @@ subroutine workshare(aa, bb, cc, dd, ee, ff, n) j = j - my_func() !$omp end atomic + !ERROR: User defined IMPURE, non-ELEMENTAL function 'impure_my_func' is not allowed in a WORKSHARE construct + cc = impure_my_func() + !ERROR: User defined IMPURE function 'impure_ele_my_func' is not allowed in a WORKSHARE construct + aa(1) = impure_ele_my_func() + !$omp end workshare + !$omp workshare + j = j + 1 + !ERROR: At most one NOWAIT clause can appear on the END WORKSHARE directive + !$omp end workshare nowait nowait + end subroutine workshare diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index a4cf4631c8470e..836e8a507bd6f2 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -134,6 +134,14 @@ add_header( DEPENDS libc.include.llvm-libc-macros.float_macros ) +add_header( + cfloat128 + HDR + cfloat128.h + DEPENDS + libc.include.llvm-libc-macros.float_macros +) +add_header(cfloat16 HDR cfloat16.h) add_header(fsblkcnt_t HDR fsblkcnt_t.h) add_header(fsfilcnt_t HDR fsfilcnt_t.h) add_header( diff --git a/libc/include/llvm-libc-types/cfloat128.h b/libc/include/llvm-libc-types/cfloat128.h new file mode 100644 index 00000000000000..0cc8ed3041d6f0 --- /dev/null +++ b/libc/include/llvm-libc-types/cfloat128.h @@ -0,0 +1,38 @@ +//===-- Definition of cfloat128 type --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_CFLOAT128_H +#define LLVM_LIBC_TYPES_CFLOAT128_H + +#include "../llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG + +// Currently, the complex variant of C23 `_Float128` type is only defined as a +// built-in type in GCC 7 or later, and only for C. For C++, or for clang, +// the complex variant of `__float128` is defined instead, and only on x86-64 +// targets. +// +// TODO: Update the complex variant of C23 `_Float128` type detection again when +// clang supports it. +// https://github.com/llvm/llvm-project/issues/80195 +#if defined(__STDC_IEC_60559_COMPLEX__) && !defined(__clang__) && \ + !defined(__cplusplus) +#define LIBC_TYPES_HAS_CFLOAT128 +typedef _Complex _Float128 cfloat128; +#elif defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) +// Use _Complex __float128 type. gcc and clang sometime use __SIZEOF_FLOAT128__ +// to notify the availability of __float128. clang also uses __FLOAT128__ macro +// to notify the availability of __float128 type: +// https://reviews.llvm.org/D15120 +#define LIBC_TYPES_HAS_CFLOAT128 +typedef _Complex __float128 cfloat128; +#elif (LDBL_MANT_DIG == 113) +#define LIBC_TYPES_HAS_CFLOAT128 +typedef _Complex long double cfloat128; +#endif + +#endif // LLVM_LIBC_TYPES_CFLOAT128_H diff --git a/libc/include/llvm-libc-types/cfloat16.h b/libc/include/llvm-libc-types/cfloat16.h new file mode 100644 index 00000000000000..e7e5631e025074 --- /dev/null +++ b/libc/include/llvm-libc-types/cfloat16.h @@ -0,0 +1,20 @@ +//===-- Definition of cfloat16 type ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_CFLOAT16_H +#define LLVM_LIBC_TYPES_CFLOAT16_H + +#if defined(__FLT16_MANT_DIG__) && \ + (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) && \ + !defined(__arm__) && !defined(_M_ARM) && !defined(__riscv) && \ + !defined(_WIN32) +#define LIBC_TYPES_HAS_CFLOAT16 +typedef _Complex _Float16 cfloat16; +#endif + +#endif // LLVM_LIBC_TYPES_CFLOAT16_H diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index c1981b827042ca..774668be42e56d 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -126,6 +126,7 @@ add_header_library( type_traits/is_array.h type_traits/is_base_of.h type_traits/is_class.h + type_traits/is_complex.h type_traits/is_const.h type_traits/is_constant_evaluated.h type_traits/is_convertible.h @@ -165,6 +166,7 @@ add_header_library( libc.include.llvm-libc-macros.stdfix_macros libc.src.__support.macros.attributes libc.src.__support.macros.properties.types + libc.src.__support.macros.properties.complex_types ) add_header_library( diff --git a/libc/src/__support/CPP/type_traits.h b/libc/src/__support/CPP/type_traits.h index cef4e5d1f0b139..d50b6612656dbb 100644 --- a/libc/src/__support/CPP/type_traits.h +++ b/libc/src/__support/CPP/type_traits.h @@ -25,7 +25,6 @@ #include "src/__support/CPP/type_traits/is_array.h" #include "src/__support/CPP/type_traits/is_base_of.h" #include "src/__support/CPP/type_traits/is_class.h" -#include "src/__support/CPP/type_traits/is_complex.h" #include "src/__support/CPP/type_traits/is_const.h" #include "src/__support/CPP/type_traits/is_constant_evaluated.h" #include "src/__support/CPP/type_traits/is_convertible.h" diff --git a/libc/src/__support/CPP/type_traits/is_complex.h b/libc/src/__support/CPP/type_traits/is_complex.h index 4f5ee9abdb33a5..23f05c08ccab5a 100644 --- a/libc/src/__support/CPP/type_traits/is_complex.h +++ b/libc/src/__support/CPP/type_traits/is_complex.h @@ -10,6 +10,10 @@ #include "src/__support/CPP/type_traits/is_same.h" #include "src/__support/CPP/type_traits/remove_cv.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/config.h" +// LIBC_TYPES_HAS_CFLOAT16 && LIBC_TYPES_HAS_CFLOAT128 +#include "src/__support/macros/properties/complex_types.h" namespace LIBC_NAMESPACE_DECL { namespace cpp { @@ -25,7 +29,16 @@ template struct is_complex { public: LIBC_INLINE_VAR static constexpr bool value = __is_unqualified_any_of(); + _Complex long double +#ifdef LIBC_TYPES_HAS_CFLOAT16 + , + cfloat16 +#endif +#ifdef LIBC_TYPES_HAS_CFLOAT128 + , + cfloat128 +#endif + >(); }; template LIBC_INLINE_VAR constexpr bool is_complex_v = is_complex::value; diff --git a/libc/src/__support/macros/properties/CMakeLists.txt b/libc/src/__support/macros/properties/CMakeLists.txt index c69f3a85d7287a..80ed63a2fbcf70 100644 --- a/libc/src/__support/macros/properties/CMakeLists.txt +++ b/libc/src/__support/macros/properties/CMakeLists.txt @@ -37,3 +37,13 @@ add_header_library( libc.include.llvm-libc-macros.float16_macros libc.include.llvm-libc-types.float128 ) + +add_header_library( + complex_types + HDRS + complex_types.h + DEPENDS + .types + libc.include.llvm-libc-types.cfloat16 + libc.include.llvm-libc-types.cfloat128 +) diff --git a/libc/src/__support/macros/properties/complex_types.h b/libc/src/__support/macros/properties/complex_types.h new file mode 100644 index 00000000000000..3f4a7646649c64 --- /dev/null +++ b/libc/src/__support/macros/properties/complex_types.h @@ -0,0 +1,25 @@ +//===-- Complex Types support -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Complex Types detection and support. + +#ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CTYPES_H +#define LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CTYPES_H + +#include "include/llvm-libc-types/cfloat128.h" +#include "include/llvm-libc-types/cfloat16.h" +#include "types.h" + +// -- cfloat16 support -------------------------------------------------------- +// LIBC_TYPES_HAS_CFLOAT16 and 'cfloat16' type is provided by +// "include/llvm-libc-types/cfloat16.h" + +// -- cfloat128 support ------------------------------------------------------- +// LIBC_TYPES_HAS_CFLOAT128 and 'cfloat128' type are provided by +// "include/llvm-libc-types/cfloat128.h" + +#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CTYPES_H diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index 5220b1245bf3a5..07e2cd5df18cbb 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -11,6 +11,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/CPP/type_traits/is_complex.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/fpbits_str.h" @@ -128,6 +129,14 @@ template class CFPMatcher : public Matcher { return matchComplex(); else if (cpp::is_complex_type_same()) return matchComplex(); +#ifdef LIBC_TYPES_HAS_CFLOAT16 + else if (cpp::is_complex_type_same) + return matchComplex(); +#endif +#ifdef LIBC_TYPES_HAS_CFLOAT128 + else if (cpp::is_complex_type_same) + return matchComplex(); +#endif } void explainError() override { @@ -137,6 +146,14 @@ template class CFPMatcher : public Matcher { return explainErrorComplex(); else if (cpp::is_complex_type_same()) return explainErrorComplex(); +#ifdef LIBC_TYPES_HAS_CFLOAT16 + else if (cpp::is_complex_type_same) + return explainErrorComplex(); +#endif +#ifdef LIBC_TYPES_HAS_CFLOAT128 + else if (cpp::is_complex_type_same) + return explainErrorComplex(); +#endif } }; diff --git a/lldb/docs/use/aarch64-linux.md b/lldb/docs/use/aarch64-linux.md index 803f56d16f981e..70432f57857a59 100644 --- a/lldb/docs/use/aarch64-linux.md +++ b/lldb/docs/use/aarch64-linux.md @@ -160,7 +160,7 @@ Kernel does. ### Visibility of an Inactive ZA Register LLDB does not handle registers that can come and go at runtime (SVE changes -size but it does not dissappear). Therefore when `za` is not enabled, LLDB +size but it does not disappear). Therefore when `za` is not enabled, LLDB will return a block of 0s instead. This block will match the expected size of `za`: ``` @@ -183,9 +183,9 @@ If you want to know whether `za` is active or not, refer to bit 2 of the As for SVE, LLDB does not know how the debugee will use `za`, and therefore does not know how it would be best to display it. At any time any given -instrucion could interpret its contents as many kinds and sizes of data. +instruction could interpret its contents as many kinds and sizes of data. -So LLDB will default to showing `za` as one large vector of individual bytes. +So LLDB will default to showing `za` as one large vector of individual bytes. You can override this with a format option (see the SVE example above). ### Expression Evaluation @@ -228,4 +228,4 @@ bytes. ### Expression Evaluation `zt0`'s value and whether it is active or not will be saved prior to -expression evaluation and restored afterwards. \ No newline at end of file +expression evaluation and restored afterwards. diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 50115a638b9589..e710f976ccc4c1 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -5066,6 +5066,9 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, case clang::BuiltinType::SveUint64x3: case clang::BuiltinType::SveUint64x4: case clang::BuiltinType::SveMFloat8: + case clang::BuiltinType::SveMFloat8x2: + case clang::BuiltinType::SveMFloat8x3: + case clang::BuiltinType::SveMFloat8x4: case clang::BuiltinType::SveFloat16: case clang::BuiltinType::SveBFloat16: case clang::BuiltinType::SveBFloat16x2: diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 9b9affd41809cb..d472cde3d50431 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1751,6 +1751,21 @@ TLI_DEFINE_ENUM_INTERNAL(log2l) TLI_DEFINE_STRING_INTERNAL("log2l") TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl) +/// int ilogb(double x); +TLI_DEFINE_ENUM_INTERNAL(ilogb) +TLI_DEFINE_STRING_INTERNAL("ilogb") +TLI_DEFINE_SIG_INTERNAL(Int, Dbl) + +/// int ilogbf(float x); +TLI_DEFINE_ENUM_INTERNAL(ilogbf) +TLI_DEFINE_STRING_INTERNAL("ilogbf") +TLI_DEFINE_SIG_INTERNAL(Int, Flt) + +/// int ilogbl(long double x); +TLI_DEFINE_ENUM_INTERNAL(ilogbl) +TLI_DEFINE_STRING_INTERNAL("ilogbl") +TLI_DEFINE_SIG_INTERNAL(Int, LDbl) + /// double logb(double x); TLI_DEFINE_ENUM_INTERNAL(logb) TLI_DEFINE_STRING_INTERNAL("logb") diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index b9d664fe570a82..84bd6cb9f1170e 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -2329,10 +2329,11 @@ class SelectionDAG { Align getEVTAlign(EVT MemoryVT) const; /// Test whether the given value is a constant int or similar node. - SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const; + bool isConstantIntBuildVectorOrConstantInt(SDValue N, + bool AllowOpaques = true) const; /// Test whether the given value is a constant FP or similar node. - SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) const ; + bool isConstantFPBuildVectorOrConstantFP(SDValue N) const; /// \returns true if \p N is any kind of constant or build_vector of /// constants, int or float. If a vector, it may not necessarily be a splat. diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index c1a47cec82730c..9162d2b2208cca 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -281,7 +281,11 @@ class TargetFrameLowering { /// hasFP - Return true if the specified function should have a dedicated /// frame pointer register. For most targets this is true only if the function /// has variable sized allocas or if frame pointer elimination is disabled. - virtual bool hasFP(const MachineFunction &MF) const = 0; + /// For all targets, this is false if the function has the naked attribute + /// since there is no prologue to set up the frame pointer. + bool hasFP(const MachineFunction &MF) const { + return !MF.getFunction().hasFnAttribute(Attribute::Naked) && hasFPImpl(MF); + } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function @@ -491,6 +495,9 @@ class TargetFrameLowering { /// targets can emit remarks based on the final frame layout. virtual void emitRemarks(const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {}; + +protected: + virtual bool hasFPImpl(const MachineFunction &MF) const = 0; }; } // End llvm namespace diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 5ab31a687ec5e9..61615cb0f7b301 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5567,9 +5567,7 @@ class TargetLowering : public TargetLoweringBase { /// If this function returns true, SelectionDAGBuilder emits a /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. - virtual bool useLoadStackGuardNode() const { - return false; - } + virtual bool useLoadStackGuardNode(const Module &M) const { return false; } virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index a62257c3da05bc..8ffb92fb4bd0dc 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1171,7 +1171,7 @@ def OMP_Workshare : Directive<"workshare"> { let category = CA_Executable; } def OMP_EndWorkshare : Directive<"end workshare"> { - let allowedClauses = [ + let allowedOnceClauses = [ VersionedClause, ]; let leafConstructs = OMP_Workshare.leafConstructs; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 77cb4370b54664..d0373a7dadfcf9 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1791,20 +1791,24 @@ class integer_of_opcode : GICombineRule < def integer_of_truncate : integer_of_opcode; -def cast_combines: GICombineGroup<[ +def cast_of_cast_combines: GICombineGroup<[ truncate_of_zext, truncate_of_sext, truncate_of_anyext, - select_of_zext, - select_of_anyext, - select_of_truncate, zext_of_zext, zext_of_anyext, sext_of_sext, sext_of_anyext, anyext_of_anyext, anyext_of_zext, - anyext_of_sext, + anyext_of_sext +]>; + +def cast_combines: GICombineGroup<[ + cast_of_cast_combines, + select_of_zext, + select_of_anyext, + select_of_truncate, buildvector_of_truncate, narrow_binop_add, narrow_binop_sub, diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 1785d77bca985c..d9651d2f47c647 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -372,6 +372,8 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_log2); TLI.setUnavailable(LibFunc_log2f); TLI.setAvailableWithName(LibFunc_logb, "_logb"); + TLI.setUnavailable(LibFunc_ilogb); + TLI.setUnavailable(LibFunc_ilogbf); if (hasPartialFloat) TLI.setAvailableWithName(LibFunc_logbf, "_logbf"); else @@ -398,6 +400,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_log1pl); TLI.setUnavailable(LibFunc_log2l); TLI.setUnavailable(LibFunc_logbl); + TLI.setUnavailable(LibFunc_ilogbl); TLI.setUnavailable(LibFunc_nearbyintl); TLI.setUnavailable(LibFunc_rintl); TLI.setUnavailable(LibFunc_roundl); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 56c1ac88da85cf..5f6b5c957aa671 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2410,7 +2410,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::stackprotector: { LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); Register GuardVal; - if (TLI->useLoadStackGuardNode()) { + if (TLI->useLoadStackGuardNode(*CI.getModule())) { GuardVal = MRI->createGenericVirtualRegister(PtrTy); getStackGuard(GuardVal, MIRBuilder); } else @@ -3901,7 +3901,7 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD, // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. - if (TLI->useLoadStackGuardNode()) { + if (TLI->useLoadStackGuardNode(*ParentBB->getBasicBlock()->getModule())) { Guard = MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits())); getStackGuard(Guard, *CurBuilder); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 18439b87a83b7d..ea869371ae107b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1205,13 +1205,13 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N01)) { SDNodeFlags NewFlags; if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && Flags.hasNoUnsignedWrap()) NewFlags.setNoUnsignedWrap(true); - if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags); @@ -9931,10 +9931,10 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { // fold (rot* (rot* x, c2), c1) // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { - SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); - SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); - if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { - EVT ShiftVT = C1->getValueType(0); + bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); + bool C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); + if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) { + EVT ShiftVT = N1.getValueType(); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); @@ -16805,8 +16805,8 @@ SDValue DAGCombiner::visitVP_FADD(SDNode *N) { SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); - SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); + bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -16903,10 +16903,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - SDNode *CFP00 = - DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); - SDNode *CFP01 = - DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { @@ -16926,10 +16924,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N1.getOpcode() == ISD::FMUL) { - SDNode *CFP10 = - DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); - SDNode *CFP11 = - DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { @@ -16949,8 +16945,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N0.getOpcode() == ISD::FADD) { - SDNode *CFP00 = - DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { @@ -16960,8 +16955,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N1.getOpcode() == ISD::FADD) { - SDNode *CFP10 = - DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { @@ -17374,11 +17368,9 @@ template SDValue DAGCombiner::visitFMA(SDNode *N) { MatchContextClass matcher(DAG, TLI, N); // Constant fold FMA. - if (isa(N0) && - isa(N1) && - isa(N2)) { - return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2); - } + if (SDValue C = + DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2})) + return C; // (-N0 * -N1) + N2 --> (N0 * N1) + N2 TargetLowering::NegatibleCost CostN0 = @@ -17494,9 +17486,8 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) { SDLoc DL(N); // Constant fold FMAD. - if (isa(N0) && isa(N1) && - isa(N2)) - return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2})) + return C; return SDValue(); } @@ -18156,8 +18147,9 @@ SDValue DAGCombiner::visitXROUND(SDNode *N) { // fold (lrint|llrint c1fp) -> c1 // fold (lround|llround c1fp) -> c1 - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0); + if (SDValue C = + DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0})) + return C; return SDValue(); } @@ -18166,10 +18158,10 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (fp_round c1fp) -> c1fp - if (SDValue C = - DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1})) return C; // fold (fp_round (fp_extend x)) -> x @@ -18200,12 +18192,10 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { - SDLoc DL(N); + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) return DAG.getNode( ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true)); - } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -18219,8 +18209,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Tmp.getNode()); - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - Tmp, N0.getOperand(1)); + return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1)); } if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) @@ -18232,42 +18221,40 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVCastOp(N, DL)) return FoldedVOp; // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. - if (N->hasOneUse() && - N->use_begin()->getOpcode() == ISD::FP_ROUND) + if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0})) + return C; // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) if (N0.getOpcode() == ISD::FP16_TO_FP && TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0)); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. - if (N0.getOpcode() == ISD::FP_ROUND - && N0.getConstantOperandVal(1) == 1) { + if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, - In, N0.getOperand(1)); - return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); + return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); @@ -18433,10 +18420,11 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (fabs c1) -> fabs(c1) - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0})) + return C; // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) @@ -18445,7 +18433,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); + return DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0)); if (SDValue Cast = foldSignChangeInBitcast(N)) return Cast; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a0159c7ad5c092..54002a068fd7ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6962,10 +6962,10 @@ void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, // Canonicalize: // binop(const, nonconst) -> binop(nonconst, const) - SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1); - SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2); - SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1); - SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2); + bool N1C = isConstantIntBuildVectorOrConstantInt(N1); + bool N2C = isConstantIntBuildVectorOrConstantInt(N2); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool N2CFP = isConstantFPBuildVectorOrConstantFP(N2); if ((N1C && !N2C) || (N1CFP && !N2CFP)) std::swap(N1, N2); @@ -13210,39 +13210,44 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -// Returns the SDNode if it is a constant integer BuildVector -// or constant integer. -SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const { - if (isa(N)) - return N.getNode(); +// Returns true if it is a constant integer BuildVector or constant integer, +// possibly hidden by a bitcast. +bool SelectionDAG::isConstantIntBuildVectorOrConstantInt( + SDValue N, bool AllowOpaques) const { + N = peekThroughBitcasts(N); + + if (auto *C = dyn_cast(N)) + return AllowOpaques || !C->isOpaque(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) - return N.getNode(); + return true; + // Treat a GlobalAddress supporting constant offset folding as a // constant integer. - if (GlobalAddressSDNode *GA = dyn_cast(N)) + if (auto *GA = dyn_cast(N)) if (GA->getOpcode() == ISD::GlobalAddress && TLI->isOffsetFoldingLegal(GA)) - return GA; + return true; + if ((N.getOpcode() == ISD::SPLAT_VECTOR) && isa(N.getOperand(0))) - return N.getNode(); - return nullptr; + return true; + return false; } -// Returns the SDNode if it is a constant float BuildVector -// or constant float. -SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { +// Returns true if it is a constant float BuildVector or constant float. +bool SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { if (isa(N)) - return N.getNode(); + return true; if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) - return N.getNode(); + return true; if ((N.getOpcode() == ISD::SPLAT_VECTOR) && isa(N.getOperand(0))) - return N.getNode(); + return true; - return nullptr; + return false; } std::optional SelectionDAG::isBoolConstant(SDValue N, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 421cc5018bdeeb..1ca2e4baf0e4dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3138,7 +3138,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. // Otherwise, emit a volatile load to retrieve the stack guard value. SDValue Chain = DAG.getEntryNode(); - if (TLI.useLoadStackGuardNode()) { + if (TLI.useLoadStackGuardNode(M)) { Guard = getLoadStackGuard(DAG, dl, Chain); } else { const Value *IRGuard = TLI.getSDagStackGuard(M); @@ -7391,7 +7391,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, const Module &M = *MF.getFunction().getParent(); EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Chain = getRoot(); - if (TLI.useLoadStackGuardNode()) { + if (TLI.useLoadStackGuardNode(M)) { Res = getLoadStackGuard(DAG, sdl, Chain); Res = DAG.getPtrExtOrTrunc(Res, sdl, PtrTy); } else { @@ -7411,9 +7411,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + const Module &M = *MF.getFunction().getParent(); SDValue Src, Chain = getRoot(); - if (TLI.useLoadStackGuardNode()) + if (TLI.useLoadStackGuardNode(M)) Src = getLoadStackGuard(DAG, sdl, Chain); else Src = getValue(I.getArgOperand(0)); // The guard's value. diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index ead6455ddd5278..321190c83b79f3 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -314,9 +314,9 @@ def AArch64PostLegalizerLowering // Post-legalization combines which are primarily optimizations. def AArch64PostLegalizerCombiner : GICombiner<"AArch64PostLegalizerCombinerImpl", - [copy_prop, combines_for_extload, - combine_indexed_load_store, - sext_trunc_sextload, mutate_anyext_to_zext, + [copy_prop, cast_of_cast_combines, buildvector_of_truncate, + integer_of_truncate, mutate_anyext_to_zext, + combines_for_extload, combine_indexed_load_store, sext_trunc_sextload, hoist_logic_op_with_same_opcode_hands, redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 1b8eac7fac21f7..bbf2f267795457 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -480,9 +480,9 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { getSVEStackSize(MF) || LowerQRegCopyThroughMem); } -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. -bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { +/// hasFPImpl - Return true if the specified function should have a dedicated +/// frame pointer register. +bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index c1973124962085..20445e63bcb13e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -65,7 +65,6 @@ class AArch64FrameLowering : public TargetFrameLowering { /// Can this function use the red zone for local allocations. bool canUseRedZone(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool assignCalleeSavedSpillSlots(MachineFunction &MF, @@ -125,6 +124,9 @@ class AArch64FrameLowering : public TargetFrameLowering { orderFrameObjects(const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const override; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If so, HOM_Prolog/HOM_Epilog pseudo diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f38cdc91baed2f..9ef8c5560ca0e9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallVectorExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -2111,7 +2112,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::BITCAST, VT, PreferNEON ? Legal : Default); setOperationAction(ISD::BITREVERSE, VT, Default); setOperationAction(ISD::BSWAP, VT, Default); - setOperationAction(ISD::BUILD_VECTOR, VT, Default); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Default); setOperationAction(ISD::CTLZ, VT, Default); setOperationAction(ISD::CTPOP, VT, Default); @@ -14396,24 +14397,72 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, return SDValue(); } -SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE( + SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + SDLoc DL(Op); + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + auto *BVN = cast(Op); - if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) { - if (auto SeqInfo = cast(Op)->isConstantSequence()) { - SDLoc DL(Op); - EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); - SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT); - SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second); - SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps); - return convertFromScalableVector(DAG, Op.getValueType(), Seq); - } + if (auto SeqInfo = BVN->isConstantSequence()) { + SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT); + SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second); + SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps); + return convertFromScalableVector(DAG, VT, Seq); + } + + unsigned NumElems = VT.getVectorNumElements(); + if (!VT.isPow2VectorType() || VT.getFixedSizeInBits() > 128 || + NumElems <= 1 || BVN->isConstant()) + return SDValue(); + + auto IsExtractElt = [](SDValue Op) { + return Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT; + }; - // Revert to common legalisation for all other variants. + // For integer types that are not already in vectors limit to at most four + // elements. This is an arbitrary restriction to avoid many fmovs from GPRs. + if (VT.getScalarType().isInteger() && + NumElems - count_if(Op->op_values(), IsExtractElt) > 4) return SDValue(); + + // Lower (pow2) BUILD_VECTORS that are <= 128-bit to a sequence of ZIP1s. + SDValue ZeroI64 = DAG.getConstant(0, DL, MVT::i64); + SmallVector Intermediates = map_to_vector<16>( + Op->op_values(), [&, Undef = DAG.getUNDEF(ContainerVT)](SDValue Op) { + return Op.isUndef() ? Undef + : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + ContainerVT, Undef, Op, ZeroI64); + }); + + ElementCount ZipEC = ContainerVT.getVectorElementCount(); + while (Intermediates.size() > 1) { + EVT ZipVT = getPackedSVEVectorVT(ZipEC); + + for (unsigned I = 0; I < Intermediates.size(); I += 2) { + SDValue Op0 = DAG.getBitcast(ZipVT, Intermediates[I + 0]); + SDValue Op1 = DAG.getBitcast(ZipVT, Intermediates[I + 1]); + Intermediates[I / 2] = + Op1.isUndef() ? Op0 + : DAG.getNode(AArch64ISD::ZIP1, DL, ZipVT, Op0, Op1); + } + + Intermediates.resize(Intermediates.size() / 2); + ZipEC = ZipEC.divideCoefficientBy(2); } + assert(Intermediates.size() == 1); + SDValue Vec = DAG.getBitcast(ContainerVT, Intermediates[0]); + return convertFromScalableVector(DAG, VT, Vec); +} + +SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) + return LowerFixedLengthBuildVectorToSVE(Op, DAG); + // Try to build a simple constant vector. Op = NormalizeBuildVector(Op, DAG); // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so, @@ -20712,7 +20761,7 @@ static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG) { if (!Add.hasOneUse()) return SDValue(); - if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(X))) + if (DAG.isConstantIntBuildVectorOrConstantInt(X)) return SDValue(); SDValue M1 = Add.getOperand(0); @@ -26983,9 +27032,9 @@ void AArch64TargetLowering::ReplaceNodeResults( } } -bool AArch64TargetLowering::useLoadStackGuardNode() const { +bool AArch64TargetLowering::useLoadStackGuardNode(const Module &M) const { if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia()) - return TargetLowering::useLoadStackGuardNode(); + return TargetLowering::useLoadStackGuardNode(M); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index cf2ae5fd027c7a..160cd18ca53b32 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -811,7 +811,7 @@ class AArch64TargetLowering : public TargetLowering { TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; - bool useLoadStackGuardNode() const override; + bool useLoadStackGuardNode(const Module &M) const override; TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; @@ -1244,6 +1244,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 7c6b789b9c1b72..ff3c69f7e10c66 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -337,10 +337,8 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call, bool AArch64TTIImpl::shouldMaximizeVectorBandwidth( TargetTransformInfo::RegisterKind K) const { assert(K != TargetTransformInfo::RGK_Scalar); - return ((K == TargetTransformInfo::RGK_FixedWidthVector && - ST->isNeonAvailable()) || - (K == TargetTransformInfo::RGK_ScalableVector && - ST->isSVEorStreamingSVEAvailable())); + return (K == TargetTransformInfo::RGK_FixedWidthVector && + ST->isNeonAvailable()); } /// Calculate the cost of materializing a 64-bit value. This helper diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.h b/llvm/lib/Target/AMDGPU/R600FrameLowering.h index f171bc4fea781f..c4621174acaba1 100644 --- a/llvm/lib/Target/AMDGPU/R600FrameLowering.h +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.h @@ -27,9 +27,8 @@ class R600FrameLowering : public AMDGPUFrameLowering { StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; - bool hasFP(const MachineFunction &MF) const override { - return false; - } +protected: + bool hasFPImpl(const MachineFunction &MF) const override { return false; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index ed3386d823d9a6..d7a7dca5da484c 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -2088,7 +2088,7 @@ static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { // The FP for kernels is always known 0, so we never really need to setup an // explicit register for it. However, DisableFramePointerElim will force us to // use a register for it. -bool SIFrameLowering::hasFP(const MachineFunction &MF) const { +bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); // For entry & chain functions we can use an immediate offset in most cases, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index b5cc2e1d56e1e1..529f76dc2dc43c 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -72,6 +72,9 @@ class SIFrameLowering final : public AMDGPUFrameLowering { const ArrayRef CSI, const TargetRegisterInfo *TRI) const override; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: void emitEntryFunctionFlatScratchInit(MachineFunction &MF, MachineBasicBlock &MBB, @@ -97,8 +100,6 @@ class SIFrameLowering final : public AMDGPUFrameLowering { MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const; public: - bool hasFP(const MachineFunction &MF) const override; - bool requiresStackPointerReference(const MachineFunction &MF) const; /// If '-amdgpu-spill-cfi-saved-regs' is enabled, emit RA/EXEC spills to diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 5b74022457c261..722a79be915dcb 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -41,9 +41,9 @@ /// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then /// // block /// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask -/// S_BRANCH_EXECZ label1 // Use our branch optimization +/// S_CBRANCH_EXECZ label1 // Use our branch optimization /// // instruction again. -/// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block +/// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the ELSE block /// label1: /// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARC/ARCFrameLowering.cpp b/llvm/lib/Target/ARC/ARCFrameLowering.cpp index 1227fae13211a8..472f1c13f362e5 100644 --- a/llvm/lib/Target/ARC/ARCFrameLowering.cpp +++ b/llvm/lib/Target/ARC/ARCFrameLowering.cpp @@ -487,7 +487,7 @@ MachineBasicBlock::iterator ARCFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } -bool ARCFrameLowering::hasFP(const MachineFunction &MF) const { +bool ARCFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); bool HasFP = MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo().hasVarSizedObjects() || diff --git a/llvm/lib/Target/ARC/ARCFrameLowering.h b/llvm/lib/Target/ARC/ARCFrameLowering.h index 9951a09842c57f..089326fe32057e 100644 --- a/llvm/lib/Target/ARC/ARCFrameLowering.h +++ b/llvm/lib/Target/ARC/ARCFrameLowering.h @@ -54,8 +54,6 @@ class ARCFrameLowering : public TargetFrameLowering { void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - bool hasFP(const MachineFunction &MF) const override; - MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; @@ -64,6 +62,9 @@ class ARCFrameLowering : public TargetFrameLowering { llvm::MachineFunction &, const llvm::TargetRegisterInfo *, std::vector &) const override; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: void adjustStackToMatchRecords(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index aad305cce03961..3f28ce8ca4b559 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -116,12 +116,9 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_iOS_SaveList; if (PushPopSplit == ARMSubtarget::SplitR7) - return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList : CSR_ATPCS_SplitPush_SaveList; - if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA) - return CSR_AAPCS_SplitPush_R11_SaveList; - return CSR_AAPCS_SaveList; } diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index 27f175a7003366..d14424c2decac3 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -301,17 +301,14 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; -// Sometimes we need to split the push of the callee-saved GPRs into two -// regions, to ensure that the frame chain record is set up correctly. These -// list the callee-saved registers in the order they end up on the stack, which -// depends on whether the frame pointer is r7 or r11. -def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, - LR, R11, - (sequence "D%u", 15, 8))>; -def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11, - R7, R6, R5, R4, - R10, R9, R8, - (sequence "D%u", 15, 8))>; +// When enforcing an AAPCS compliant frame chain, R11 is used as the frame +// pointer even for Thumb targets, where split pushes are necessary. +// This AAPCS alternative makes sure the frame index slots match the push +// order in that case. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 2706efa83fc3f1..e0703457aa8139 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -199,11 +199,6 @@ SpillArea getSpillArea(Register Reg, // push {r0-r10, r12} GPRCS1 // vpush {r8-d15} DPRCS1 // push {r11, lr} GPRCS2 - // - // SplitR11AAPCSSignRA: - // push {r0-r10, r12} GPRSC1 - // push {r11, lr} GPRCS2 - // vpush {r8-d15} DPRCS1 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at // the top of the stack frame. @@ -251,8 +246,7 @@ SpillArea getSpillArea(Register Reg, return SpillArea::GPRCS1; case ARM::LR: - if (Variation == ARMSubtarget::SplitR11WindowsSEH || - Variation == ARMSubtarget::SplitR11AAPCSSignRA) + if (Variation == ARMSubtarget::SplitR11WindowsSEH) return SpillArea::GPRCS2; else return SpillArea::GPRCS1; @@ -323,10 +317,10 @@ bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const { return true; } -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. This is true if the function has variable sized allocas -/// or if frame pointer elimination is disabled. -bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { +/// hasFPImpl - Return true if the specified function should have a dedicated +/// frame pointer register. This is true if the function has variable sized +/// allocas or if frame pointer elimination is disabled. +bool ARMFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -869,9 +863,6 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). int MaxRegBytes = 8 * 4; - if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA) - // Here, r11 can be stored below all of r4-r15. - MaxRegBytes = 11 * 4; if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { // Here, r11 can be stored below all of r4-r15 plus d8-d15. MaxRegBytes = 11 * 4 + 8 * 8; @@ -944,23 +935,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Determine spill area sizes, and some important frame indices. - SpillArea FramePtrSpillArea; - bool BeforeFPPush = true; for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); - SpillArea Area = getSpillArea(Reg, PushPopSplit, - AFI->getNumAlignedDPRCS2Regs(), RegInfo); - - if (Reg == FramePtr) { + if (Reg == FramePtr) FramePtrSpillFI = FI; - FramePtrSpillArea = Area; - } if (Reg == ARM::D8) D8SpillFI = FI; - switch (Area) { + switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(), + RegInfo)) { case SpillArea::FPCXT: FPCXTSaveSize += 4; break; @@ -987,7 +972,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Move past FPCXT area. if (FPCXTSaveSize > 0) { LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush); + DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true); } // Allocate the vararg register save area. @@ -995,15 +980,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); LastPush = std::prev(MBBI); - DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush); + DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true); } // Move past area 1. if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush); - if (FramePtrSpillArea == SpillArea::GPRCS1) - BeforeFPPush = false; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); } // Determine starting offsets of spill areas. These offsets are all positive @@ -1027,6 +1010,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } else { DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; } + int FramePtrOffsetInPush = 0; if (HasFP) { // Offset from the CFA to the saved frame pointer, will be negative. [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); @@ -1034,6 +1018,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, << ", FPOffset: " << FPOffset << "\n"); assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); + // Offset from the top of the GPRCS1 area to the saved frame pointer, will + // be negative. + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; + LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush + << ", FramePtrSpillOffset=" + << (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes) + << "\n"); AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -1045,9 +1036,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // after DPRCS1. if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) { GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush); - if (FramePtrSpillArea == SpillArea::GPRCS2) - BeforeFPPush = false; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our @@ -1060,7 +1049,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); } } @@ -1069,8 +1058,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) { - DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI), - BeforeFPPush); + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); LastPush = MBBI++; } } @@ -1089,9 +1077,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Move GPRCS2, if using using SplitR11WindowsSEH. if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush); - if (FramePtrSpillArea == SpillArea::GPRCS2) - BeforeFPPush = false; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } bool NeedsWinCFIStackAlloc = NeedsWinCFI; @@ -1192,51 +1178,28 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + // FIXME: The above is not necessary true when PACBTI is enabled. + // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, + // so FP ends up on area two. MachineBasicBlock::iterator AfterPush; if (HasFP) { - MachineBasicBlock::iterator FPPushInst; - // Offset from SP immediately after the push which saved the FP to the FP - // save slot. - int64_t FPOffsetAfterPush; - switch (FramePtrSpillArea) { - case SpillArea::GPRCS1: - FPPushInst = GPRCS1Push; - FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) + - ArgRegsSaveSize + FPCXTSaveSize + - sizeOfSPAdjustment(*FPPushInst); - LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset " - << FPOffsetAfterPush << " after that push\n"); - break; - case SpillArea::GPRCS2: - FPPushInst = GPRCS2Push; - FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) + - ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + - sizeOfSPAdjustment(*FPPushInst); - if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) - FPOffsetAfterPush += DPRCSSize + DPRGapSize; - LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset " - << FPOffsetAfterPush << " after that push\n"); - break; - default: - llvm_unreachable("frame pointer in unknown spill area"); - break; + AfterPush = std::next(GPRCS1Push); + unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); + int FPOffset = PushSize + FramePtrOffsetInPush; + if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { + AfterPush = std::next(GPRCS2Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); + } else { + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffset, + MachineInstr::FrameSetup); } - AfterPush = std::next(FPPushInst); - if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) - assert(FPOffsetAfterPush == 0); - - // Emit the MOV or ADD to set up the frame pointer register. - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, - FramePtr, ARM::SP, FPOffsetAfterPush, - MachineInstr::FrameSetup); - if (!NeedsWinCFI) { - // Emit DWARF info to find the CFA using the frame pointer from this - // point onward. - if (FPOffsetAfterPush != 0) { + if (FramePtrOffsetInPush + PushSize != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), - -MFI.getObjectOffset(FramePtrSpillFI))); + FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1749,8 +1712,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && - (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH && - PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) { + PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -2983,29 +2945,18 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots( const auto &AFI = *MF.getInfo(); if (AFI.shouldSignReturnAddress()) { // The order of register must match the order we push them, because the - // PEI assigns frame indices in that order. That order depends on the - // PushPopSplitVariation, there are only two cases which we use with return - // address signing: - switch (STI.getPushPopSplitVariation(MF)) { - case ARMSubtarget::SplitR7: - // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 - CSI.insert(find_if(CSI, - [=](const auto &CS) { - Register Reg = CS.getReg(); - return Reg == ARM::R10 || Reg == ARM::R11 || - Reg == ARM::R8 || Reg == ARM::R9 || - ARM::DPRRegClass.contains(Reg); - }), - CalleeSavedInfo(ARM::R12)); - break; - case ARMSubtarget::SplitR11AAPCSSignRA: - // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR - // on the stack. - CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12)); - break; - default: - llvm_unreachable("Unexpected CSR split with return address signing"); - } + // PEI assigns frame indices in that order. When compiling for return + // address sign and authenication, we use split push, therefore the orders + // we want are: + // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 + CSI.insert(find_if(CSI, + [=](const auto &CS) { + Register Reg = CS.getReg(); + return Reg == ARM::R10 || Reg == ARM::R11 || + Reg == ARM::R8 || Reg == ARM::R9 || + ARM::DPRRegClass.contains(Reg); + }), + CalleeSavedInfo(ARM::R12)); } return false; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 3c5bc00cb449f1..ff51f1a7af0229 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -45,7 +45,6 @@ class ARMFrameLowering : public TargetFrameLowering { bool enableCalleeSaveSkip(const MachineFunction &MF) const override; - bool hasFP(const MachineFunction &MF) const override; bool isFPReserved(const MachineFunction &MF) const; bool requiresAAPCSFrameRecord(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; @@ -87,6 +86,9 @@ class ARMFrameLowering : public TargetFrameLowering { const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, unsigned StmOpc, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 64c0500191e41e..5d679a1a916dc4 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21305,7 +21305,7 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic( return InsertFencesForAtomic; } -bool ARMTargetLowering::useLoadStackGuardNode() const { +bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const { // ROPI/RWPI are not supported currently. return !Subtarget->isROPI() && !Subtarget->isRWPI(); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 316f7d3b9bce5d..ef651bc3d84c0f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -675,7 +675,7 @@ class VectorType; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; - bool useLoadStackGuardNode() const override; + bool useLoadStackGuardNode(const Module &M) const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 9adfb1fab5f084..c4a782bc40910a 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -514,12 +514,5 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const { F.needsUnwindTableEntry() && (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF))) return SplitR11WindowsSEH; - - // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each - // other in the list of callee saved registers in a frame, and branch - // signing is enabled. - if (MF.getInfo()->shouldSignReturnAddress() && - getFramePointerReg() == ARM::R11) - return SplitR11AAPCSSignRA; return NoSplit; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 214c5f1b45e556..7917ddc17bdb81 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -105,18 +105,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// vpush {d8-d15} /// push {r11, lr} SplitR11WindowsSEH, - - /// When generating AAPCS-compilant frame chains, R11 is the frame pointer, - /// and must be pushed adjacent to the return address (LR). Normally this - /// isn't a problem, because the only register between them is r12, which is - /// the intra-procedure-call scratch register, so doesn't need to be saved. - /// However, when PACBTI is in use, r12 contains the authentication code, so - /// does need to be saved. This means that we need a separate push for R11 - /// and LR. - /// push {r0-r10, r12} - /// push {r11, lr} - /// vpush {d8-d15} - SplitR11AAPCSSignRA, }; protected: diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index 64dd0338bf60ed..91b0f8c6b2df48 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -232,7 +232,7 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, // // Notice that strictly this is not a frame pointer because it contains SP after // frame allocation instead of having the original SP in function entry. -bool AVRFrameLowering::hasFP(const MachineFunction &MF) const { +bool AVRFrameLowering::hasFPImpl(const MachineFunction &MF) const { const AVRMachineFunctionInfo *FuncInfo = MF.getInfo(); return (FuncInfo->getHasSpills() || FuncInfo->getHasAllocas() || diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.h b/llvm/lib/Target/AVR/AVRFrameLowering.h index a550c0efbb8ef7..7baa5e9d62f60b 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.h +++ b/llvm/lib/Target/AVR/AVRFrameLowering.h @@ -21,7 +21,6 @@ class AVRFrameLowering : public TargetFrameLowering { public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool hasFP(const MachineFunction &MF) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, @@ -38,6 +37,9 @@ class AVRFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/BPF/BPFFrameLowering.cpp b/llvm/lib/Target/BPF/BPFFrameLowering.cpp index 8812cfdd86da43..123b99f254234d 100644 --- a/llvm/lib/Target/BPF/BPFFrameLowering.cpp +++ b/llvm/lib/Target/BPF/BPFFrameLowering.cpp @@ -20,7 +20,9 @@ using namespace llvm; -bool BPFFrameLowering::hasFP(const MachineFunction &MF) const { return true; } +bool BPFFrameLowering::hasFPImpl(const MachineFunction &MF) const { + return true; +} void BPFFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const {} diff --git a/llvm/lib/Target/BPF/BPFFrameLowering.h b/llvm/lib/Target/BPF/BPFFrameLowering.h index a546351ec6cbbf..6beffcbe69dd0b 100644 --- a/llvm/lib/Target/BPF/BPFFrameLowering.h +++ b/llvm/lib/Target/BPF/BPFFrameLowering.h @@ -26,7 +26,6 @@ class BPFFrameLowering : public TargetFrameLowering { void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool hasFP(const MachineFunction &MF) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; @@ -35,6 +34,9 @@ class BPFFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator MI) const override { return MBB.erase(MI); } + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } #endif diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp index cedcbff1db24fc..c023b5a0de5ad5 100644 --- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp @@ -33,7 +33,7 @@ static Register getFPReg(const CSKYSubtarget &STI) { return CSKY::R8; } // callee saved register to save the value. static Register getBPReg(const CSKYSubtarget &STI) { return CSKY::R7; } -bool CSKYFrameLowering::hasFP(const MachineFunction &MF) const { +bool CSKYFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.h b/llvm/lib/Target/CSKY/CSKYFrameLowering.h index 69bf01cf1801e5..0b3b287bb6a55b 100644 --- a/llvm/lib/Target/CSKY/CSKYFrameLowering.h +++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.h @@ -61,7 +61,6 @@ class CSKYFrameLowering : public TargetFrameLowering { MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; @@ -69,6 +68,9 @@ class CSKYFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/DirectX/DirectXFrameLowering.h b/llvm/lib/Target/DirectX/DirectXFrameLowering.h index 76a1450054be81..85823556d55504 100644 --- a/llvm/lib/Target/DirectX/DirectXFrameLowering.h +++ b/llvm/lib/Target/DirectX/DirectXFrameLowering.h @@ -29,7 +29,8 @@ class DirectXFrameLowering : public TargetFrameLowering { void emitPrologue(MachineFunction &, MachineBasicBlock &) const override {} void emitEpilogue(MachineFunction &, MachineBasicBlock &) const override {} - bool hasFP(const MachineFunction &) const override { return false; } +protected: + bool hasFPImpl(const MachineFunction &) const override { return false; } }; } // namespace llvm #endif // LLVM_DIRECTX_DIRECTXFRAMELOWERING_H diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index 7c82f5e9f9a604..48acd9da9587fe 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1144,10 +1144,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, } } -bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { - if (MF.getFunction().hasFnAttribute(Attribute::Naked)) - return false; - +bool HexagonFrameLowering::hasFPImpl(const MachineFunction &MF) const { auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget().getRegisterInfo(); bool HasExtraAlign = HRI.hasStackRealignment(MF); diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index 98e69dcc4b3915..926aadb01f50e5 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -89,7 +89,6 @@ class HexagonFrameLowering : public TargetFrameLowering { StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; - bool hasFP(const MachineFunction &MF) const override; const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const override { @@ -114,6 +113,9 @@ class HexagonFrameLowering : public TargetFrameLowering { void insertCFIInstructions(MachineFunction &MF) const; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: using CSIVect = std::vector; diff --git a/llvm/lib/Target/Lanai/LanaiFrameLowering.h b/llvm/lib/Target/Lanai/LanaiFrameLowering.h index 380d63df7301ef..9bd78d008f77e3 100644 --- a/llvm/lib/Target/Lanai/LanaiFrameLowering.h +++ b/llvm/lib/Target/Lanai/LanaiFrameLowering.h @@ -44,10 +44,11 @@ class LanaiFrameLowering : public TargetFrameLowering { eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; - bool hasFP(const MachineFunction & /*MF*/) const override { return true; } - void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; + +protected: + bool hasFPImpl(const MachineFunction & /*MF*/) const override { return true; } }; } // namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 4e504729b23e2d..1a787c63c6241b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -31,7 +31,7 @@ using namespace llvm; // pointer register. This is true if frame pointer elimination is // disabled, if it needs dynamic stack realignment, if the function has // variable sized allocas, or if the frame address is taken. -bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { +bool LoongArchFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index bc2ac02c91f814..6cbfcf665f6a93 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -49,13 +49,15 @@ class LoongArchFrameLowering : public TargetFrameLowering { StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; bool enableShrinkWrapping(const MachineFunction &MF) const override; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: void determineFrameLayout(MachineFunction &MF) const; void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp index 1445bac0b92e85..4245061f0ae749 100644 --- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp +++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp @@ -40,7 +40,7 @@ M68kFrameLowering::M68kFrameLowering(const M68kSubtarget &STI, Align Alignment) StackPtr = TRI->getStackRegister(); } -bool M68kFrameLowering::hasFP(const MachineFunction &MF) const { +bool M68kFrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.h b/llvm/lib/Target/M68k/M68kFrameLowering.h index a5349377232eb6..ed2bfb605ff13a 100644 --- a/llvm/lib/Target/M68k/M68kFrameLowering.h +++ b/llvm/lib/Target/M68k/M68kFrameLowering.h @@ -121,12 +121,6 @@ class M68kFrameLowering : public TargetFrameLowering { MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; - /// Return true if the specified function should have a dedicated frame - /// pointer register. This is true if the function has variable sized - /// allocas, if it needs dynamic stack realignment, if frame pointer - /// elimination is disabled, or if the frame address is taken. - bool hasFP(const MachineFunction &MF) const override; - /// Under normal circumstances, when a frame pointer is not required, we /// reserve argument space for call sites in the function immediately on /// entry to the current function. This eliminates the need for add/sub sp @@ -166,6 +160,13 @@ class M68kFrameLowering : public TargetFrameLowering { /// pointer by a constant value. void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t NumBytes, bool InEpilogue) const; + +protected: + /// Return true if the specified function should have a dedicated frame + /// pointer register. This is true if the function has variable sized + /// allocas, if it needs dynamic stack realignment, if frame pointer + /// elimination is disabled, or if the frame address is taken. + bool hasFPImpl(const MachineFunction &MF) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp index d0dc6dd146efdb..045dedfb385385 100644 --- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -30,7 +30,7 @@ MSP430FrameLowering::MSP430FrameLowering(const MSP430Subtarget &STI) Align(2)), STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {} -bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const { +bool MSP430FrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/llvm/lib/Target/MSP430/MSP430FrameLowering.h index 5227d3e731edb3..daa4eec998ee87 100644 --- a/llvm/lib/Target/MSP430/MSP430FrameLowering.h +++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.h @@ -24,6 +24,7 @@ class MSP430RegisterInfo; class MSP430FrameLowering : public TargetFrameLowering { protected: + bool hasFPImpl(const MachineFunction &MF) const override; public: MSP430FrameLowering(const MSP430Subtarget &STI); @@ -51,7 +52,6 @@ class MSP430FrameLowering : public TargetFrameLowering { MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/llvm/lib/Target/Mips/MipsFrameLowering.cpp index 99d225f9abfe89..9b3edcd61ae1e2 100644 --- a/llvm/lib/Target/Mips/MipsFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsFrameLowering.cpp @@ -86,11 +86,11 @@ const MipsFrameLowering *MipsFrameLowering::create(const MipsSubtarget &ST) { return llvm::createMipsSEFrameLowering(ST); } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas, -// if it needs dynamic stack realignment, if frame pointer elimination is -// disabled, or if the frame address is taken. -bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { +// hasFPImpl - Return true if the specified function should have a dedicated +// frame pointer register. This is true if the function has variable sized +// allocas, if it needs dynamic stack realignment, if frame pointer elimination +// is disabled, or if the frame address is taken. +bool MipsFrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.h b/llvm/lib/Target/Mips/MipsFrameLowering.h index 710a3d40c38efc..25adc33fbf5cab 100644 --- a/llvm/lib/Target/Mips/MipsFrameLowering.h +++ b/llvm/lib/Target/Mips/MipsFrameLowering.h @@ -23,6 +23,8 @@ class MipsFrameLowering : public TargetFrameLowering { protected: const MipsSubtarget &STI; + bool hasFPImpl(const MachineFunction &MF) const override; + public: explicit MipsFrameLowering(const MipsSubtarget &sti, Align Alignment) : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) { @@ -30,8 +32,6 @@ class MipsFrameLowering : public TargetFrameLowering { static const MipsFrameLowering *create(const MipsSubtarget &ST); - bool hasFP(const MachineFunction &MF) const override; - bool hasBP(const MachineFunction &MF) const; bool allocateScavengingFrameIndexesNearIncomingSP( diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 9abe0e3186f200..a5f6cab421fb7e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -27,7 +27,9 @@ using namespace llvm; NVPTXFrameLowering::NVPTXFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, Align(8), 0) {} -bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } +bool NVPTXFrameLowering::hasFPImpl(const MachineFunction &MF) const { + return true; +} void NVPTXFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h index a5d49ac3ab2930..f8d1f978327bc0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -22,7 +22,6 @@ class NVPTXFrameLowering : public TargetFrameLowering { public: explicit NVPTXFrameLowering(); - bool hasFP(const MachineFunction &MF) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, @@ -32,6 +31,9 @@ class NVPTXFrameLowering : public TargetFrameLowering { eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 7f942de74bdcc9..93c2d92ef7c1c8 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1028,6 +1028,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.", ScopeToString(S))); } + break; } case NVPTX::Ordering::SequentiallyConsistent: { switch (S) { @@ -1046,6 +1047,7 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S, report_fatal_error(formatv("Unsupported scope \"{}\" for seq_cst fence.", ScopeToString(S))); } + break; } case NVPTX::Ordering::NotAtomic: case NVPTX::Ordering::Relaxed: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index f7188b856461b7..1083febc5f8520 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -355,9 +355,9 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, return FrameSize; } -// hasFP - Return true if the specified function actually has a dedicated frame -// pointer register. -bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { +// hasFPImpl - Return true if the specified function actually has a dedicated +// frame pointer register. +bool PPCFrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); // FIXME: This is pretty much broken by design: hasFP() might be called really // early, before the stack layout was calculated and thus hasFP() might return diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h index d74c87428326ca..47f249862946f3 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -107,7 +107,6 @@ class PPCFrameLowering: public TargetFrameLowering { void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override; - bool hasFP(const MachineFunction &MF) const override; bool needsFP(const MachineFunction &MF) const; void replaceFPWithRealFP(MachineFunction &MF) const; @@ -176,6 +175,9 @@ class PPCFrameLowering: public TargetFrameLowering { void updateCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const; uint64_t getStackThreshold() const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7b07f6b6d15157..5d6c7c729a7617 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17884,10 +17884,10 @@ SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, } // Override to enable LOAD_STACK_GUARD lowering on Linux. -bool PPCTargetLowering::useLoadStackGuardNode() const { - if (!Subtarget.isTargetLinux()) - return TargetLowering::useLoadStackGuardNode(); - return true; +bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const { + if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux()) + return true; + return TargetLowering::useLoadStackGuardNode(M); } // Override to disable global variable loading on Linux and insert AIX canary diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 8907c3c5a81c3c..8c7961e641c354 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1137,7 +1137,7 @@ namespace llvm { getExceptionSelectorRegister(const Constant *PersonalityFn) const override; /// Override to support customized stack guard loading. - bool useLoadStackGuardNode() const override; + bool useLoadStackGuardNode(const Module &M) const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 48833e8f88066c..bc2a1b295b4333 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/TargetRegistry.h" @@ -3107,9 +3108,16 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case TargetOpcode::LOAD_STACK_GUARD: { - assert(Subtarget.isTargetLinux() && - "Only Linux target is expected to contain LOAD_STACK_GUARD"); - const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; + auto M = MBB.getParent()->getFunction().getParent(); + assert( + (Subtarget.isTargetLinux() || M->getStackProtectorGuard() == "tls") && + "Only Linux target or tls mode are expected to contain " + "LOAD_STACK_GUARD"); + int64_t Offset; + if (M->getStackProtectorGuard() == "tls") + Offset = M->getStackProtectorGuardOffset(); + else + Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); MachineInstrBuilder(*MI.getParent()->getParent(), MI) diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 5ad09ae7290fc5..5eba36a0bb7d69 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -19,6 +19,7 @@ #include "RISCV.h" #include "RISCVConstantPoolValue.h" #include "RISCVMachineFunctionInfo.h" +#include "RISCVRegisterInfo.h" #include "RISCVTargetMachine.h" #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/ADT/APInt.h" @@ -348,6 +349,13 @@ bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (!MO.isReg()) OS << 'i'; return false; + case 'N': // Print the register encoding as an integer (0-31) + if (!MO.isReg()) + return true; + + const RISCVRegisterInfo *TRI = STI->getRegisterInfo(); + OS << TRI->getEncodingValue(MO.getReg()); + return false; } } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f388376c12c943..b49cbab1876d79 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -309,7 +309,7 @@ static Register getMaxPushPopReg(const MachineFunction &MF, // pointer register. This is true if frame pointer elimination is // disabled, if it needs dynamic stack realignment, if the function has // variable sized allocas, or if the frame address is taken. -bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { +bool RISCVFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index d660f3ad67c968..f45fcdb0acd6bc 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -37,8 +37,6 @@ class RISCVFrameLowering : public TargetFrameLowering { void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - bool hasFP(const MachineFunction &MF) const override; - bool hasBP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; @@ -83,6 +81,8 @@ class RISCVFrameLowering : public TargetFrameLowering { protected: const RISCVSubtarget &STI; + bool hasFPImpl(const MachineFunction &MF) const override; + private: void determineFrameLayout(MachineFunction &MF) const; void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 952072c26739f9..60ac58f824ede4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -37,6 +37,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -625,6 +627,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Subtarget.is64Bit() ? Legal : Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); + } + setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); if (Subtarget.is64Bit()) @@ -7402,6 +7409,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2), Flags, DL); } + case ISD::INIT_TRAMPOLINE: + return lowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: + return lowerADJUST_TRAMPOLINE(Op, DAG); } } @@ -7417,6 +7428,126 @@ SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain, return CallResult.second; } +SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + if (!Subtarget.is64Bit()) + llvm::report_fatal_error("Trampolines only implemented for RV64"); + + // Create an MCCodeEmitter to encode instructions. + TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering(); + assert(TLO); + MCContext &MCCtx = TLO->getContext(); + + std::unique_ptr CodeEmitter( + createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx)); + + SDValue Root = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDLoc dl(Op); + + const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); + + // We store in the trampoline buffer the following instructions and data. + // Offset: + // 0: auipc t2, 0 + // 4: ld t0, 24(t2) + // 8: ld t2, 16(t2) + // 12: jalr t0 + // 16: + // 24: + // 32: + + constexpr unsigned StaticChainOffset = 16; + constexpr unsigned FunctionAddressOffset = 24; + + const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo(); + assert(STI); + auto GetEncoding = [&](const MCInst &MC) { + SmallVector CB; + SmallVector Fixups; + CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI); + uint32_t Encoding = support::endian::read32le(CB.data()); + return Encoding; + }; + + SDValue OutChains[6]; + + uint32_t Encodings[] = { + // auipc t2, 0 + // Loads the current PC into t2. + GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)), + // ld t0, 24(t2) + // Loads the function address into t0. Note that we are using offsets + // pc-relative to the first instruction of the trampoline. + GetEncoding( + MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm( + FunctionAddressOffset)), + // ld t2, 16(t2) + // Load the value of the static chain. + GetEncoding( + MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm( + StaticChainOffset)), + // jalr t0 + // Jump to the function. + GetEncoding(MCInstBuilder(RISCV::JALR) + .addReg(RISCV::X0) + .addReg(RISCV::X5) + .addImm(0))}; + + // Store encoded instructions. + for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) { + SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(Idx * 4, dl, MVT::i64)) + : Trmp; + OutChains[Idx] = DAG.getTruncStore( + Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr, + MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32); + } + + // Now store the variable part of the trampoline. + SDValue FunctionAddress = Op.getOperand(2); + SDValue StaticChain = Op.getOperand(3); + + // Store the given static chain and function pointer in the trampoline buffer. + struct OffsetValuePair { + const unsigned Offset; + const SDValue Value; + SDValue Addr = SDValue(); // Used to cache the address. + } OffsetValues[] = { + {StaticChainOffset, StaticChain}, + {FunctionAddressOffset, FunctionAddress}, + }; + for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) { + SDValue Addr = + DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(OffsetValue.Offset, dl, MVT::i64)); + OffsetValue.Addr = Addr; + OutChains[Idx + 4] = + DAG.getStore(Root, dl, OffsetValue.Value, Addr, + MachinePointerInfo(TrmpAddr, OffsetValue.Offset)); + } + + SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + + // The end of instructions of trampoline is the same as the static chain + // address that we computed earlier. + SDValue EndOfTrmp = OffsetValues[0].Addr; + + // Call clear cache on the trampoline instructions. + SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken, + Trmp, EndOfTrmp); + + return Chain; +} + +SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + if (!Subtarget.is64Bit()) + llvm::report_fatal_error("Trampolines only implemented for RV64"); + + return Op.getOperand(0); +} + static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags) { return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); @@ -20235,6 +20366,8 @@ RISCVTargetLowering::getConstraintType(StringRef Constraint) const { } else { if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm") return C_RegisterClass; + if (Constraint == "cr" || Constraint == "cf") + return C_RegisterClass; } return TargetLowering::getConstraintType(Constraint); } @@ -20297,6 +20430,22 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } else if (Constraint == "vm") { if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) return std::make_pair(0U, &RISCV::VMV0RegClass); + } else if (Constraint == "cr") { + if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) + return std::make_pair(0U, &RISCV::GPRF16CRegClass); + if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) + return std::make_pair(0U, &RISCV::GPRF32CRegClass); + if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) + return std::make_pair(0U, &RISCV::GPRPairCRegClass); + if (!VT.isVector()) + return std::make_pair(0U, &RISCV::GPRCRegClass); + } else if (Constraint == "cf") { + if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) + return std::make_pair(0U, &RISCV::FPR16CRegClass); + if (Subtarget.hasStdExtF() && VT == MVT::f32) + return std::make_pair(0U, &RISCV::FPR32CRegClass); + if (Subtarget.hasStdExtD() && VT == MVT::f64) + return std::make_pair(0U, &RISCV::FPR64CRegClass); } // Clang will correctly decode the usage of register name aliases into their diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3864d58a129e98..c3749447955330 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -992,6 +992,9 @@ class RISCVTargetLowering : public TargetLowering { SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector &ArgLocs) const; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 33363aa8b71830..250f3c10f309bf 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -338,6 +338,11 @@ def FPR16 : RISCVRegisterClass<[f16, bf16], 16, (add (sequence "F%u_H", 18, 27) // fs2-fs11 )>; +def FPR16C : RISCVRegisterClass<[f16, bf16], 16, (add + (sequence "F%u_H", 15, 10), + (sequence "F%u_H", 8, 9) +)>; + def FPR32 : RISCVRegisterClass<[f32], 32, (add (sequence "F%u_F", 15, 10), (sequence "F%u_F", 0, 7), @@ -667,6 +672,10 @@ def GPRF32C : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 15), (sequence "X%u_W", 8, 9))>; def GPRF32NoX0 : RISCVRegisterClass<[f32], 32, (sub GPRF32, X0_W)>; +def XLenPairRI : RegInfoByHwMode< + [RV32, RV64], + [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>; + // Dummy zero register for use in the register pair containing X0 (as X1 is // not read to or written when the X0 register pair is used). def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">; @@ -698,9 +707,8 @@ let RegAltNameIndices = [ABIRegAltName] in { } } -let RegInfos = RegInfoByHwMode<[RV32, RV64], - [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>, - DecoderMethod = "DecodeGPRPairRegisterClass" in +let RegInfos = XLenPairRI, + DecoderMethod = "DecodeGPRPairRegisterClass" in { def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add X10_X11, X12_X13, X14_X15, X16_X17, X6_X7, @@ -710,6 +718,11 @@ def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add X0_Pair, X2_X3, X4_X5 )>; +def GPRPairC : RISCVRegisterClass<[XLenPairFVT], 64, (add + X10_X11, X12_X13, X14_X15, X8_X9 +)>; +} // let RegInfos = XLenPairRI, DecoderMethod = "DecodeGPRPairRegisterClass" + // The register class is added for inline assembly for vector mask types. def VM : VReg; diff --git a/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h b/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h index b98f8d0928e5b7..c7522554166a7e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h @@ -33,7 +33,8 @@ class SPIRVFrameLowering : public TargetFrameLowering { void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override {} - bool hasFP(const MachineFunction &MF) const override { return false; } +protected: + bool hasFPImpl(const MachineFunction &MF) const override { return false; } }; } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index 000418be9a9e33..fa38c6cbb6ebbf 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -249,10 +249,10 @@ bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo().hasVarSizedObjects(); } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -bool SparcFrameLowering::hasFP(const MachineFunction &MF) const { +// hasFPImpl - Return true if the specified function should have a dedicated +// frame pointer register. This is true if the function has variable sized +// allocas or if frame pointer elimination is disabled. +bool SparcFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.h b/llvm/lib/Target/Sparc/SparcFrameLowering.h index ab0ceb6591c63c..803856811969b2 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.h +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.h @@ -35,7 +35,6 @@ class SparcFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator I) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; - bool hasFP(const MachineFunction &MF) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; @@ -47,6 +46,9 @@ class SparcFrameLowering : public TargetFrameLowering { /// time). bool targetHandlesStackFrameRounding() const override { return true; } +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: // Remap input registers to output registers for leaf procedure. void remapRegsForLeafProc(MachineFunction &MF) const; diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 42b8248006d1fd..de4986ef1e89e4 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -3548,9 +3548,9 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N, } // Override to enable LOAD_STACK_GUARD lowering on Linux. -bool SparcTargetLowering::useLoadStackGuardNode() const { +bool SparcTargetLowering::useLoadStackGuardNode(const Module &M) const { if (!Subtarget->isTargetLinux()) - return TargetLowering::useLoadStackGuardNode(); + return TargetLowering::useLoadStackGuardNode(M); return true; } diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index 15d09bc9309754..cc672074a4be80 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -119,7 +119,7 @@ namespace llvm { } /// Override to support customized stack guard loading. - bool useLoadStackGuardNode() const override; + bool useLoadStackGuardNode(const Module &M) const override; void insertSSPDeclarations(Module &M) const override; /// getSetCCResultType - Return the ISD::SETCC ValueType diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 8c53b8dffc2fa6..8fbd05eab5f6ee 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -832,7 +832,7 @@ void SystemZELFFrameLowering::inlineStackProbe( } } -bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const { +bool SystemZELFFrameLowering::hasFPImpl(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo().hasVarSizedObjects()); } @@ -1449,7 +1449,12 @@ void SystemZXPLINKFrameLowering::inlineStackProbe( fullyRecomputeLiveIns({StackExtMBB, NextMBB}); } -bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const { +bool SystemZXPLINKFrameLowering::hasFPImpl(const MachineFunction &MF) const { + // Naked functions have no stack frame pushed, so we don't have a frame + // pointer. + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) + return false; + return (MF.getFrameInfo().hasVarSizedObjects()); } diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index c4367b491f99ef..57fc73b78bbf7c 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -86,7 +86,6 @@ class SystemZELFFrameLowering : public SystemZFrameLowering { void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override; - bool hasFP(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; void @@ -113,6 +112,9 @@ class SystemZELFFrameLowering : public SystemZFrameLowering { // Get or create the frame index of where the old frame pointer is stored. int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; class SystemZXPLINKFrameLowering : public SystemZFrameLowering { @@ -147,8 +149,6 @@ class SystemZXPLINKFrameLowering : public SystemZFrameLowering { void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override; - bool hasFP(const MachineFunction &MF) const override; - void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; @@ -167,6 +167,9 @@ class SystemZXPLINKFrameLowering : public SystemZFrameLowering { // Get or create the frame index of where the old frame pointer is stored. int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 2b065245c16f20..3c06c1fdf2b1bc 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -570,9 +570,7 @@ class SystemZTargetLowering : public TargetLowering { getExceptionSelectorRegister(const Constant *PersonalityFn) const override; /// Override to support customized stack guard loading. - bool useLoadStackGuardNode() const override { - return true; - } + bool useLoadStackGuardNode(const Module &M) const override { return true; } void insertSSPDeclarations(Module &M) const override { } diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp index 195bd4e6c3aee7..10e94c28072fda 100644 --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -415,10 +415,10 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF, emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true); } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas -// or if frame pointer elimination is disabled. -bool VEFrameLowering::hasFP(const MachineFunction &MF) const { +// hasFPImpl - Return true if the specified function should have a dedicated +// frame pointer register. This is true if the function has variable sized +// allocas or if frame pointer elimination is disabled. +bool VEFrameLowering::hasFPImpl(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h index 36fc8b201b648c..be9cdc01d6f446 100644 --- a/llvm/lib/Target/VE/VEFrameLowering.h +++ b/llvm/lib/Target/VE/VEFrameLowering.h @@ -39,7 +39,6 @@ class VEFrameLowering : public TargetFrameLowering { eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; bool hasGOT(const MachineFunction &MF) const; @@ -69,6 +68,8 @@ class VEFrameLowering : public TargetFrameLowering { protected: const VESubtarget &STI; + bool hasFPImpl(const MachineFunction &MF) const override; + private: // Returns true if MF is a leaf procedure. bool isLeafProc(MachineFunction &MF) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 8f3ad167ae41fc..f0334ccb3afcb5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -98,7 +98,7 @@ bool WebAssemblyFrameLowering::hasBP(const MachineFunction &MF) const { /// Return true if the specified function should have a dedicated frame pointer /// register. -bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { +bool WebAssemblyFrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); // When we have var-sized objects, we move the stack pointer by an unknown diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 528b33e34beeef..710d5173d64dba 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -41,7 +41,6 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool isSupportedStackID(TargetStackID::Value ID) const override; DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override; @@ -68,6 +67,9 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { static unsigned getOpcGlobGet(const MachineFunction &MF); static unsigned getOpcGlobSet(const MachineFunction &MF); +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: bool hasBP(const MachineFunction &MF) const; bool needsSPForLocalFrame(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 4f83267c999e4a..a35b04606e595d 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -91,10 +91,10 @@ bool X86FrameLowering::needsFrameIndexResolution( MF.getInfo()->getHasPushSequences(); } -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. This is true if the function has variable sized allocas -/// or if frame pointer elimination is disabled. -bool X86FrameLowering::hasFP(const MachineFunction &MF) const { +/// hasFPImpl - Return true if the specified function should have a dedicated +/// frame pointer register. This is true if the function has variable sized +/// allocas or if frame pointer elimination is disabled. +bool X86FrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 78217911dacadf..02fe8ee02a7e45 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -105,7 +105,6 @@ class X86FrameLowering : public TargetFrameLowering { void spillFPBP(MachineFunction &MF) const override; - bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; bool needsFrameIndexResolution(const MachineFunction &MF) const override; @@ -201,6 +200,9 @@ class X86FrameLowering : public TargetFrameLowering { /// frame of the top of stack function) as part of it's ABI. bool has128ByteRedZone(const MachineFunction& MF) const; +protected: + bool hasFPImpl(const MachineFunction &MF) const override; + private: bool isWin64Prologue(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0e586032136144..ff2c7964e4186f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2679,7 +2679,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // This has so far only been implemented for 64-bit MachO. -bool X86TargetLowering::useLoadStackGuardNode() const { +bool X86TargetLowering::useLoadStackGuardNode(const Module &M) const { return Subtarget.isTargetMachO() && Subtarget.is64Bit(); } @@ -56538,14 +56538,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); SDLoc DL(N); - // TODO: Add NoOpaque handling to isConstantIntBuildVectorOrConstantInt. auto IsNonOpaqueConstant = [&](SDValue Op) { - if (SDNode *C = DAG.isConstantIntBuildVectorOrConstantInt(Op)) { - if (auto *Cst = dyn_cast(C)) - return !Cst->isOpaque(); - return true; - } - return false; + return DAG.isConstantIntBuildVectorOrConstantInt(Op, + /*AllowOpaques*/ false); }; // X86 can't encode an immediate LHS of a sub. See if we can push the diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3b1bd0ad9a267e..14ada1721fd40e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1568,7 +1568,7 @@ namespace llvm { /// returns the address of that location. Otherwise, returns nullptr. Value *getIRStackGuard(IRBuilderBase &IRB) const override; - bool useLoadStackGuardNode() const override; + bool useLoadStackGuardNode(const Module &M) const override; bool useStackGuardXorFP() const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index b3753692ac2a05..ec18eca82b52d1 100644 --- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -215,7 +215,7 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) // Do nothing } -bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const { +bool XCoreFrameLowering::hasFPImpl(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo().hasVarSizedObjects(); } diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.h b/llvm/lib/Target/XCore/XCoreFrameLowering.h index a914d82e198947..b06a6f922cdde0 100644 --- a/llvm/lib/Target/XCore/XCoreFrameLowering.h +++ b/llvm/lib/Target/XCore/XCoreFrameLowering.h @@ -46,8 +46,6 @@ namespace llvm { eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; - bool hasFP(const MachineFunction &MF) const override; - void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; @@ -58,6 +56,9 @@ namespace llvm { static int stackSlotSize() { return 4; } + + protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp index e24cb7714d3646..f46d386c9186aa 100644 --- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.cpp @@ -27,7 +27,7 @@ XtensaFrameLowering::XtensaFrameLowering(const XtensaSubtarget &STI) Align(4)), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {} -bool XtensaFrameLowering::hasFP(const MachineFunction &MF) const { +bool XtensaFrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI.hasVarSizedObjects(); diff --git a/llvm/lib/Target/Xtensa/XtensaFrameLowering.h b/llvm/lib/Target/Xtensa/XtensaFrameLowering.h index 9120215af08b52..3f946e1ea730f9 100644 --- a/llvm/lib/Target/Xtensa/XtensaFrameLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaFrameLowering.h @@ -24,8 +24,6 @@ class XtensaFrameLowering : public TargetFrameLowering { public: XtensaFrameLowering(const XtensaSubtarget &STI); - bool hasFP(const MachineFunction &MF) const override; - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. void emitPrologue(MachineFunction &, MachineBasicBlock &) const override; @@ -50,6 +48,9 @@ class XtensaFrameLowering : public TargetFrameLowering { void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; + +protected: + bool hasFPImpl(const MachineFunction &MF) const override; }; } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index bd0a337e579e48..7feebbe420ae53 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -57,9 +57,9 @@ static cl::opt MaxBlockPredecessors( "considered during the estimation of dead code")); static cl::opt MinFunctionSize( - "funcspec-min-function-size", cl::init(300), cl::Hidden, cl::desc( - "Don't specialize functions that have less than this number of " - "instructions")); + "funcspec-min-function-size", cl::init(500), cl::Hidden, + cl::desc("Don't specialize functions that have less than this number of " + "instructions")); static cl::opt MaxCodeSizeGrowth( "funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc( @@ -641,12 +641,17 @@ bool FunctionSpecializer::run() { Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues); } + // When specializing literal constants is enabled, always require functions + // to be larger than MinFunctionSize, to prevent excessive specialization. + const bool RequireMinSize = + !ForceSpecialization && + (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline)); + // If the code metrics reveal that we shouldn't duplicate the function, // or if the code size implies that this function is easy to get inlined, // then we shouldn't specialize it. if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || - (!ForceSpecialization && !F.hasFnAttribute(Attribute::NoInline) && - Metrics.NumInsts < MinFunctionSize)) + (RequireMinSize && Metrics.NumInsts < MinFunctionSize)) continue; // TODO: For now only consider recursive functions when running multiple diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 954c4cf19c2077..c8b9f166b16020 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4822,7 +4822,8 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I, // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. - if (I->mayReadFromMemory()) { + if (I->mayReadFromMemory() && + !I->hasMetadata(LLVMContext::MD_invariant_load)) { // We don't want to do any sophisticated alias analysis, so we only check // the instructions after I in I's parent block if we try to sink to its // successor block. diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 7bb4b55fcb7cf2..c97a77d12e3e9d 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1229,6 +1229,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_logb: case LibFunc_logbf: case LibFunc_logbl: + case LibFunc_ilogb: + case LibFunc_ilogbf: + case LibFunc_ilogbl: case LibFunc_logf: case LibFunc_logl: case LibFunc_nearbyint: diff --git a/llvm/lib/Transforms/Utils/CtorUtils.cpp b/llvm/lib/Transforms/Utils/CtorUtils.cpp index 507729bc5ebc06..968446c4eee117 100644 --- a/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -45,9 +45,9 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov } // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = - new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), - CA, "", GCL->getThreadLocalMode()); + GlobalVariable *NGV = new GlobalVariable( + CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", + GCL->getThreadLocalMode(), GCL->getAddressSpace()); GCL->getParent()->insertGlobalVariable(GCL->getIterator(), NGV); NGV->takeName(GCL); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 7ded51d9e3abd1..c1b97791331bcf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1555,7 +1555,8 @@ VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, void VPSlotTracker::assignName(const VPValue *V) { assert(!VPValue2Name.contains(V) && "VPValue already has a name!"); auto *UV = V->getUnderlyingValue(); - if (!UV) { + auto *VPI = dyn_cast_or_null(V->getDefiningRecipe()); + if (!UV && !(VPI && !VPI->getName().empty())) { VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str(); NextSlot++; return; @@ -1564,10 +1565,15 @@ void VPSlotTracker::assignName(const VPValue *V) { // Use the name of the underlying Value, wrapped in "ir<>", and versioned by // appending ".Number" to the name if there are multiple uses. std::string Name; - raw_string_ostream S(Name); - UV->printAsOperand(S, false); + if (UV) { + raw_string_ostream S(Name); + UV->printAsOperand(S, false); + } else + Name = VPI->getName(); + assert(!Name.empty() && "Name cannot be empty."); - std::string BaseName = (Twine("ir<") + Name + Twine(">")).str(); + StringRef Prefix = UV ? "ir<" : "vp<%"; + std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str(); // First assign the base name for V. const auto &[A, _] = VPValue2Name.insert({V, BaseName}); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fd97dda6dc1b25..59a084401cc9bf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1414,6 +1414,9 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Returns true if this VPInstruction's operands are single scalars and the /// result is also a single scalar. bool isSingleScalar() const; + + /// Returns the symbolic name assigned to the VPInstruction. + StringRef getName() const { return Name; } }; /// A recipe to wrap on original IR instruction not to be modified during diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir index ae04cc77dcaf13..b045deebc56e03 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir @@ -135,20 +135,13 @@ name: test_combine_trunc_build_vector legalized: true body: | bb.1: - ; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector - ; CHECK-PRE: %arg1:_(s64) = COPY $x0 - ; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0 - ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64) - ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64) - ; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_build_vector - ; CHECK-POST: %arg1:_(s64) = COPY $x0 - ; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0 - ; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>) - ; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>) + ; CHECK-LABEL: name: test_combine_trunc_build_vector + ; CHECK: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %arg2:_(s64) = COPY $x0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64) + ; CHECK-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) + ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>) %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x0 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir index 4a38b5d4c63dd9..9a2b9dd4b2b608 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir @@ -32,20 +32,12 @@ legalized: true body: | bb.1: liveins: $h0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s16 - ; CHECK-PRE: liveins: $h0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; CHECK-PRE-NEXT: $w0 = COPY [[ANYEXT]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s16 - ; CHECK-POST: liveins: $h0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) %0:_(s16) = COPY $h0 %1:_(s64) = G_ANYEXT %0(s16) %2:_(s32) = G_TRUNC %1(s64) @@ -82,20 +74,12 @@ legalized: true body: | bb.1: liveins: $h0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_sext_s32_s16 - ; CHECK-PRE: liveins: $h0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-PRE-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) - ; CHECK-PRE-NEXT: $w0 = COPY [[SEXT]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_sext_s32_s16 - ; CHECK-POST: liveins: $h0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-POST-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16) + ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32) %0:_(s16) = COPY $h0 %1:_(s64) = G_SEXT %0(s16) %2:_(s32) = G_TRUNC %1(s64) @@ -107,20 +91,12 @@ legalized: true body: | bb.1: liveins: $h0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_zext_s32_s16 - ; CHECK-PRE: liveins: $h0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) - ; CHECK-PRE-NEXT: $w0 = COPY [[ZEXT]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_zext_s32_s16 - ; CHECK-POST: liveins: $h0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 - ; CHECK-POST-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16 + ; CHECK: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16) + ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) %0:_(s16) = COPY $h0 %1:_(s64) = G_ZEXT %0(s16) %2:_(s32) = G_TRUNC %1(s64) @@ -132,19 +108,11 @@ legalized: true body: | bb.1: liveins: $w0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s32 - ; CHECK-PRE: liveins: $w0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-PRE-NEXT: $w0 = COPY [[COPY]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s32 - ; CHECK-POST: liveins: $w0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) %0:_(s32) = COPY $w0 %1:_(s64) = G_ANYEXT %0(s32) %2:_(s32) = G_TRUNC %1(s64) @@ -156,20 +124,12 @@ legalized: true body: | bb.1: liveins: $x0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s64 - ; CHECK-PRE: liveins: $x0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-PRE-NEXT: $w0 = COPY [[TRUNC]](s32) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s64 - ; CHECK-POST: liveins: $x0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[COPY]](s64) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s128) - ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32) %0:_(s64) = COPY $x0 %1:_(s128) = G_ANYEXT %0(s64) %2:_(s32) = G_TRUNC %1(s128) diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll index fc1a0c71d4cdf0..ce7e3101a7a541 100644 --- a/llvm/test/CodeGen/AArch64/add.ll +++ b/llvm/test/CodeGen/AArch64/add.ll @@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll index f005ca47ad124f..09f00b3845f25f 100644 --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -530,10 +530,10 @@ define i64 @test_2_selects(i8 zeroext %a) { ; CHECK-LABEL: test_2_selects: ; CHECK: ; %bb.0: ; CHECK-NEXT: add w9, w0, #24 -; CHECK-NEXT: mov w8, #131 +; CHECK-NEXT: mov w8, #131 ; =0x83 ; CHECK-NEXT: and w9, w9, #0xff ; CHECK-NEXT: cmp w9, #81 -; CHECK-NEXT: mov w9, #57 +; CHECK-NEXT: mov w9, #57 ; =0x39 ; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: csel x9, xzr, x9, eq ; CHECK-NEXT: add x0, x8, x9 diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll index 5385a917619fa0..459daece90deed 100644 --- a/llvm/test/CodeGen/AArch64/andorxor.ll +++ b/llvm/test/CodeGen/AArch64/andorxor.ll @@ -463,11 +463,7 @@ define void @and_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -514,11 +510,7 @@ define void @or_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -565,11 +557,7 @@ define void @xor_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index 79cfeedb74bce0..bbdf8b0a13d358 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -60,11 +60,7 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){ ; CHECK-GI-LABEL: bitcast_v4i8_i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = add <4 x i8> %a, %b @@ -116,9 +112,7 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){ ; CHECK-GI-LABEL: bitcast_v2i16_i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-NEXT: xtn v0.4h, v1.4s +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = add <2 x i16> %a, %b @@ -418,9 +412,7 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){ ; CHECK-GI-LABEL: bitcast_v2i16_v4i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-NEXT: xtn v0.4h, v1.4s +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: mov b1, v0.b[1] ; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] ; CHECK-GI-NEXT: mov b3, v0.b[2] @@ -455,11 +447,7 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ ; CHECK-GI-LABEL: bitcast_v4i8_v2i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index d800b2549cf223..0033999b9bd51d 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -33,18 +33,8 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) { ; ; CHECK-GI-LABEL: concat2: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.h[0], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v3.h[0], v1.h[0] -; CHECK-GI-NEXT: mov v2.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v3.h[1], v1.h[1] -; CHECK-GI-NEXT: mov v2.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v3.h[2], v1.h[2] -; CHECK-GI-NEXT: mov v2.h[3], v0.h[3] -; CHECK-GI-NEXT: mov v3.h[3], v1.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v2.8h -; CHECK-GI-NEXT: xtn v1.8b, v3.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: fmov w8, s1 @@ -74,15 +64,9 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) { ; ; CHECK-GI-LABEL: concat4: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.s[0], v0.s[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v2.s[1], v0.s[1] -; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] -; CHECK-GI-NEXT: xtn v2.4h, v2.4s -; CHECK-GI-NEXT: mov v0.s[1], v1.s[1] -; CHECK-GI-NEXT: xtn v1.4h, v0.4s -; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: fmov w8, s1 ; CHECK-GI-NEXT: mov v0.s[1], w8 @@ -183,12 +167,11 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) { ; ; CHECK-GI-LABEL: concat_v8s16_v2s16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x0, #2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: xtn v0.4h, v0.4s -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: ldrh w9, [x0, #2] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] ; CHECK-GI-NEXT: ret %a = load <2 x i16>, ptr %ptr %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> @@ -238,34 +221,14 @@ define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, < ; ; CHECK-GI-LABEL: concat_v16s8_v4s8_reg: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v4.h[0], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v5.h[0], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: mov v6.h[0], v2.h[0] -; CHECK-GI-NEXT: mov v7.h[0], v3.h[0] -; CHECK-GI-NEXT: mov v4.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v5.h[1], v1.h[1] -; CHECK-GI-NEXT: mov v6.h[1], v2.h[1] -; CHECK-GI-NEXT: mov v7.h[1], v3.h[1] -; CHECK-GI-NEXT: mov v4.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v5.h[2], v1.h[2] -; CHECK-GI-NEXT: mov v6.h[2], v2.h[2] -; CHECK-GI-NEXT: mov v7.h[2], v3.h[2] -; CHECK-GI-NEXT: mov v4.h[3], v0.h[3] -; CHECK-GI-NEXT: mov v5.h[3], v1.h[3] -; CHECK-GI-NEXT: mov v6.h[3], v2.h[3] -; CHECK-GI-NEXT: mov v7.h[3], v3.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v4.8h -; CHECK-GI-NEXT: xtn v1.8b, v5.8h -; CHECK-GI-NEXT: xtn v2.8b, v6.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: xtn v1.8b, v7.8h +; CHECK-GI-NEXT: uzp1 v2.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: mov v0.s[1], w8 +; CHECK-GI-NEXT: uzp1 v1.8b, v3.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s2 ; CHECK-GI-NEXT: mov v0.s[2], w8 ; CHECK-GI-NEXT: fmov w8, s1 @@ -291,29 +254,17 @@ define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> % ; ; CHECK-GI-LABEL: concat_v8s16_v2s16_reg: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v4.s[0], v0.s[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v5.s[0], v1.s[0] -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: mov v4.s[1], v0.s[1] -; CHECK-GI-NEXT: mov v5.s[1], v1.s[1] -; CHECK-GI-NEXT: mov v1.s[0], v2.s[0] -; CHECK-GI-NEXT: xtn v0.4h, v4.4s -; CHECK-GI-NEXT: xtn v4.4h, v5.4s -; CHECK-GI-NEXT: mov v1.s[1], v2.s[1] -; CHECK-GI-NEXT: mov v2.s[0], v3.s[0] +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: xtn v1.4h, v1.4s -; CHECK-GI-NEXT: mov v2.s[1], v3.s[1] ; CHECK-GI-NEXT: mov v0.s[0], w8 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: xtn v2.4h, v2.4s -; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: mov v0.s[2], w8 +; CHECK-GI-NEXT: uzp1 v2.4h, v2.4h, v0.4h +; CHECK-GI-NEXT: mov v0.s[1], w8 +; CHECK-GI-NEXT: uzp1 v1.4h, v3.4h, v0.4h ; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov v0.s[2], w8 +; CHECK-GI-NEXT: fmov w8, s1 ; CHECK-GI-NEXT: mov v0.s[3], w8 ; CHECK-GI-NEXT: ret %b = shufflevector <2 x i16> %A, <2 x i16> %B, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll index baab53d8bdbd46..66f26fc9d85973 100644 --- a/llvm/test/CodeGen/AArch64/fcmp.ll +++ b/llvm/test/CodeGen/AArch64/fcmp.ll @@ -922,26 +922,27 @@ define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x ; CHECK-GI-LABEL: v3f64_i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-GI-NEXT: mov w8, #31 // =0x1f ; CHECK-GI-NEXT: fcmp d2, d5 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] ; CHECK-GI-NEXT: mov v1.s[0], w8 ; CHECK-GI-NEXT: cset w9, mi -; CHECK-GI-NEXT: mov v2.d[0], x9 +; CHECK-GI-NEXT: mov v2.s[0], w9 ; CHECK-GI-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-GI-NEXT: fcmgt v0.2d, v3.2d, v0.2d ; CHECK-GI-NEXT: mov v1.s[1], w8 ; CHECK-GI-NEXT: mov v3.s[0], w9 +; CHECK-GI-NEXT: xtn v0.2s, v0.2d ; CHECK-GI-NEXT: mov v1.s[2], w8 -; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: mov v3.s[1], w9 +; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] +; CHECK-GI-NEXT: mov v3.s[2], w9 ; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: neg v1.4s, v1.4s -; CHECK-GI-NEXT: mov v3.s[2], w9 ; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: eor v1.16b, v0.16b, v3.16b ; CHECK-GI-NEXT: and v0.16b, v6.16b, v0.16b diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index c5bde81ba4a5ea..81c1a64f2d434f 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -7937,10 +7937,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; ; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-FP16-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s +; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll index 9e748c9641aa8c..5e7f71c18c27a0 100644 --- a/llvm/test/CodeGen/AArch64/mul.ll +++ b/llvm/test/CodeGen/AArch64/mul.ll @@ -183,11 +183,7 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/AArch64/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..fb559867a2d47b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/naked-fn-with-frame-pointer.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64 | FileCheck %s -check-prefixes=CHECK-LE +; RUN: llc < %s -mtriple aarch64_be | FileCheck %s -check-prefixes=CHECK-BE + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LE-LABEL: naked: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: bl main +; +; CHECK-BE-LABEL: naked: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: bl main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LE-LABEL: normal: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-LE-NEXT: mov x29, sp +; CHECK-LE-NEXT: .cfi_def_cfa w29, 16 +; CHECK-LE-NEXT: .cfi_offset w30, -8 +; CHECK-LE-NEXT: .cfi_offset w29, -16 +; CHECK-LE-NEXT: bl main +; +; CHECK-BE-LABEL: normal: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-BE-NEXT: mov x29, sp +; CHECK-BE-NEXT: .cfi_def_cfa w29, 16 +; CHECK-BE-NEXT: .cfi_offset w30, -8 +; CHECK-BE-NEXT: .cfi_offset w29, -16 +; CHECK-BE-NEXT: bl main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/AArch64/ptrauth-type-info-vptr-discr.ll b/llvm/test/CodeGen/AArch64/ptrauth-type-info-vptr-discr.ll new file mode 100644 index 00000000000000..fbd777911aecb2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ptrauth-type-info-vptr-discr.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple aarch64-linux-gnu -mattr=+pauth -filetype=asm -o - %s | FileCheck --check-prefix=ELF %s +; RUN: llc -mtriple aarch64-apple-darwin -mattr=+pauth -filetype=asm -o - %s | FileCheck --check-prefix=MACHO %s + +; ELF-LABEL: _ZTI10Disc: +; ELF-NEXT: .xword (_ZTVN10__cxxabiv117__class_type_infoE+16)@AUTH(da,45546,addr) +; ELF-LABEL: _ZTI10NoDisc: +; ELF-NEXT: .xword (_ZTVN10__cxxabiv117__class_type_infoE+16)@AUTH(da,45546) + +; MACHO-LABEL: __ZTI10Disc: +; MACHO-NEXT: .quad (__ZTVN10__cxxabiv117__class_type_infoE+16)@AUTH(da,45546,addr) +; MACHO-LABEL: __ZTI10NoDisc: +; MACHO-NEXT: .quad (__ZTVN10__cxxabiv117__class_type_infoE+16)@AUTH(da,45546) + + +@_ZTI10Disc = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2, i64 45546, ptr @_ZTI10Disc), ptr @_ZTS10Disc }, align 8 +@_ZTS10Disc = constant [4 x i8] c"Disc", align 1 + +@_ZTI10NoDisc = constant { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), i32 2, i64 45546), ptr @_ZTS10NoDisc }, align 8 +@_ZTS10NoDisc = constant [6 x i8] c"NoDisc", align 1 + +@_ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr] diff --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll index 8e7586bd4843c7..c298e6d8a1ff2a 100644 --- a/llvm/test/CodeGen/AArch64/sub.ll +++ b/llvm/test/CodeGen/AArch64/sub.ll @@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 ; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov v1.h[0], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] -; CHECK-GI-NEXT: xtn v0.8b, v1.8h +; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll index 276f23703df3df..20659cde83ee00 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll @@ -140,98 +140,65 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) { ; ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops: ; SVE2_128_NOMAX: // %bb.0: -; SVE2_128_NOMAX-NEXT: sub sp, sp, #16 -; SVE2_128_NOMAX-NEXT: .cfi_def_cfa_offset 16 ; SVE2_128_NOMAX-NEXT: ldr d0, [x1] -; SVE2_128_NOMAX-NEXT: mov z1.b, z0.b[7] -; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[6] -; SVE2_128_NOMAX-NEXT: mov z3.b, z0.b[4] -; SVE2_128_NOMAX-NEXT: fmov w8, s1 ; SVE2_128_NOMAX-NEXT: ldr d1, [x0] -; SVE2_128_NOMAX-NEXT: fmov w9, s2 ; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[3] -; SVE2_128_NOMAX-NEXT: mov z1.b, z1.b[1] -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #15] -; SVE2_128_NOMAX-NEXT: fmov w8, s3 ; SVE2_128_NOMAX-NEXT: mov z3.b, z0.b[2] -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #14] -; SVE2_128_NOMAX-NEXT: mov z0.b, z0.b[1] -; SVE2_128_NOMAX-NEXT: fmov w9, s2 -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #13] -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #12] -; SVE2_128_NOMAX-NEXT: fmov w8, s3 -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #11] -; SVE2_128_NOMAX-NEXT: fmov w9, s0 -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #10] -; SVE2_128_NOMAX-NEXT: fmov w8, s1 -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #9] -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #8] -; SVE2_128_NOMAX-NEXT: ldr d0, [sp, #8] -; SVE2_128_NOMAX-NEXT: add sp, sp, #16 +; SVE2_128_NOMAX-NEXT: mov z4.b, z0.b[1] +; SVE2_128_NOMAX-NEXT: mov z1.b, z1.b[1] +; SVE2_128_NOMAX-NEXT: mov z5.b, z0.b[7] +; SVE2_128_NOMAX-NEXT: mov z6.b, z0.b[6] +; SVE2_128_NOMAX-NEXT: mov z0.b, z0.b[4] +; SVE2_128_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b +; SVE2_128_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b +; SVE2_128_NOMAX-NEXT: zip1 z3.b, z6.b, z5.b +; SVE2_128_NOMAX-NEXT: zip1 z0.b, z0.b, z0.b +; SVE2_128_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2_128_NOMAX-NEXT: zip1 z0.h, z0.h, z3.h +; SVE2_128_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s +; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2_128_NOMAX-NEXT: ret ; ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops: ; SVE2_NOMIN_NOMAX: // %bb.0: -; SVE2_NOMIN_NOMAX-NEXT: sub sp, sp, #16 -; SVE2_NOMIN_NOMAX-NEXT: .cfi_def_cfa_offset 16 ; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x1] -; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z0.b[7] -; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[6] -; SVE2_NOMIN_NOMAX-NEXT: mov z3.b, z0.b[4] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s1 ; SVE2_NOMIN_NOMAX-NEXT: ldr d1, [x0] -; SVE2_NOMIN_NOMAX-NEXT: fmov w9, s2 ; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[3] -; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z1.b[1] -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #15] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s3 ; SVE2_NOMIN_NOMAX-NEXT: mov z3.b, z0.b[2] -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #14] -; SVE2_NOMIN_NOMAX-NEXT: mov z0.b, z0.b[1] -; SVE2_NOMIN_NOMAX-NEXT: fmov w9, s2 -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #13] -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #12] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s3 -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #11] -; SVE2_NOMIN_NOMAX-NEXT: fmov w9, s0 -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #10] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s1 -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #9] -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #8] -; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [sp, #8] -; SVE2_NOMIN_NOMAX-NEXT: add sp, sp, #16 +; SVE2_NOMIN_NOMAX-NEXT: mov z4.b, z0.b[1] +; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z1.b[1] +; SVE2_NOMIN_NOMAX-NEXT: mov z5.b, z0.b[7] +; SVE2_NOMIN_NOMAX-NEXT: mov z6.b, z0.b[6] +; SVE2_NOMIN_NOMAX-NEXT: mov z0.b, z0.b[4] +; SVE2_NOMIN_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z3.b, z6.b, z5.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.b, z0.b, z0.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.h, z0.h, z3.h +; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s +; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2_NOMIN_NOMAX-NEXT: ret ; ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops: ; SVE2_MIN_256_NOMAX: // %bb.0: -; SVE2_MIN_256_NOMAX-NEXT: sub sp, sp, #16 -; SVE2_MIN_256_NOMAX-NEXT: .cfi_def_cfa_offset 16 ; SVE2_MIN_256_NOMAX-NEXT: ldr d0, [x1] -; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z0.b[7] -; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[6] -; SVE2_MIN_256_NOMAX-NEXT: mov z3.b, z0.b[4] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s1 ; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x0] -; SVE2_MIN_256_NOMAX-NEXT: fmov w9, s2 ; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[3] -; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z1.b[1] -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #15] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s3 ; SVE2_MIN_256_NOMAX-NEXT: mov z3.b, z0.b[2] -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #14] -; SVE2_MIN_256_NOMAX-NEXT: mov z0.b, z0.b[1] -; SVE2_MIN_256_NOMAX-NEXT: fmov w9, s2 -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #13] -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #12] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s3 -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #11] -; SVE2_MIN_256_NOMAX-NEXT: fmov w9, s0 -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #10] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s1 -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #9] -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #8] -; SVE2_MIN_256_NOMAX-NEXT: ldr d0, [sp, #8] -; SVE2_MIN_256_NOMAX-NEXT: add sp, sp, #16 +; SVE2_MIN_256_NOMAX-NEXT: mov z4.b, z0.b[1] +; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z1.b[1] +; SVE2_MIN_256_NOMAX-NEXT: mov z5.b, z0.b[7] +; SVE2_MIN_256_NOMAX-NEXT: mov z6.b, z0.b[6] +; SVE2_MIN_256_NOMAX-NEXT: mov z0.b, z0.b[4] +; SVE2_MIN_256_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z3.b, z6.b, z5.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.b, z0.b, z0.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.h, z0.h, z3.h +; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s +; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2_MIN_256_NOMAX-NEXT: ret %op1 = load <8 x i8>, ptr %a %op2 = load <8 x i8>, ptr %b @@ -263,89 +230,59 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) { ; ; SVE2_128_NOMAX-LABEL: shuffle_index_poison_value: ; SVE2_128_NOMAX: // %bb.0: -; SVE2_128_NOMAX-NEXT: sub sp, sp, #16 -; SVE2_128_NOMAX-NEXT: .cfi_def_cfa_offset 16 ; SVE2_128_NOMAX-NEXT: ldr d0, [x1] -; SVE2_128_NOMAX-NEXT: ldr d3, [x0] -; SVE2_128_NOMAX-NEXT: mov z1.b, z0.b[6] -; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[4] -; SVE2_128_NOMAX-NEXT: fmov w8, s1 -; SVE2_128_NOMAX-NEXT: mov z1.b, z0.b[3] -; SVE2_128_NOMAX-NEXT: fmov w9, s2 -; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[2] -; SVE2_128_NOMAX-NEXT: mov z0.b, z0.b[1] -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #14] -; SVE2_128_NOMAX-NEXT: fmov w8, s1 -; SVE2_128_NOMAX-NEXT: mov z1.b, z3.b[1] -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #13] -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #12] -; SVE2_128_NOMAX-NEXT: fmov w9, s2 -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #11] -; SVE2_128_NOMAX-NEXT: fmov w8, s0 -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #10] -; SVE2_128_NOMAX-NEXT: fmov w9, s1 -; SVE2_128_NOMAX-NEXT: strb w8, [sp, #9] -; SVE2_128_NOMAX-NEXT: strb w9, [sp, #8] -; SVE2_128_NOMAX-NEXT: ldr d0, [sp, #8] -; SVE2_128_NOMAX-NEXT: add sp, sp, #16 +; SVE2_128_NOMAX-NEXT: ldr d1, [x0] +; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[3] +; SVE2_128_NOMAX-NEXT: mov z3.b, z0.b[2] +; SVE2_128_NOMAX-NEXT: mov z4.b, z0.b[1] +; SVE2_128_NOMAX-NEXT: mov z1.b, z1.b[1] +; SVE2_128_NOMAX-NEXT: mov z5.b, z0.b[4] +; SVE2_128_NOMAX-NEXT: mov z0.b, z0.b[6] +; SVE2_128_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b +; SVE2_128_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b +; SVE2_128_NOMAX-NEXT: zip1 z3.b, z5.b, z5.b +; SVE2_128_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2_128_NOMAX-NEXT: zip1 z0.h, z3.h, z0.h +; SVE2_128_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s +; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2_128_NOMAX-NEXT: ret ; ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_poison_value: ; SVE2_NOMIN_NOMAX: // %bb.0: -; SVE2_NOMIN_NOMAX-NEXT: sub sp, sp, #16 -; SVE2_NOMIN_NOMAX-NEXT: .cfi_def_cfa_offset 16 ; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x1] -; SVE2_NOMIN_NOMAX-NEXT: ldr d3, [x0] -; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z0.b[6] -; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[4] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s1 -; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z0.b[3] -; SVE2_NOMIN_NOMAX-NEXT: fmov w9, s2 -; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[2] -; SVE2_NOMIN_NOMAX-NEXT: mov z0.b, z0.b[1] -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #14] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s1 -; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z3.b[1] -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #13] -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #12] -; SVE2_NOMIN_NOMAX-NEXT: fmov w9, s2 -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #11] -; SVE2_NOMIN_NOMAX-NEXT: fmov w8, s0 -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #10] -; SVE2_NOMIN_NOMAX-NEXT: fmov w9, s1 -; SVE2_NOMIN_NOMAX-NEXT: strb w8, [sp, #9] -; SVE2_NOMIN_NOMAX-NEXT: strb w9, [sp, #8] -; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [sp, #8] -; SVE2_NOMIN_NOMAX-NEXT: add sp, sp, #16 +; SVE2_NOMIN_NOMAX-NEXT: ldr d1, [x0] +; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[3] +; SVE2_NOMIN_NOMAX-NEXT: mov z3.b, z0.b[2] +; SVE2_NOMIN_NOMAX-NEXT: mov z4.b, z0.b[1] +; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z1.b[1] +; SVE2_NOMIN_NOMAX-NEXT: mov z5.b, z0.b[4] +; SVE2_NOMIN_NOMAX-NEXT: mov z0.b, z0.b[6] +; SVE2_NOMIN_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z3.b, z5.b, z5.b +; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.h, z3.h, z0.h +; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s +; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2_NOMIN_NOMAX-NEXT: ret ; ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_poison_value: ; SVE2_MIN_256_NOMAX: // %bb.0: -; SVE2_MIN_256_NOMAX-NEXT: sub sp, sp, #16 -; SVE2_MIN_256_NOMAX-NEXT: .cfi_def_cfa_offset 16 ; SVE2_MIN_256_NOMAX-NEXT: ldr d0, [x1] -; SVE2_MIN_256_NOMAX-NEXT: ldr d3, [x0] -; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z0.b[6] -; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[4] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s1 -; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z0.b[3] -; SVE2_MIN_256_NOMAX-NEXT: fmov w9, s2 -; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[2] -; SVE2_MIN_256_NOMAX-NEXT: mov z0.b, z0.b[1] -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #14] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s1 -; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z3.b[1] -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #13] -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #12] -; SVE2_MIN_256_NOMAX-NEXT: fmov w9, s2 -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #11] -; SVE2_MIN_256_NOMAX-NEXT: fmov w8, s0 -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #10] -; SVE2_MIN_256_NOMAX-NEXT: fmov w9, s1 -; SVE2_MIN_256_NOMAX-NEXT: strb w8, [sp, #9] -; SVE2_MIN_256_NOMAX-NEXT: strb w9, [sp, #8] -; SVE2_MIN_256_NOMAX-NEXT: ldr d0, [sp, #8] -; SVE2_MIN_256_NOMAX-NEXT: add sp, sp, #16 +; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x0] +; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[3] +; SVE2_MIN_256_NOMAX-NEXT: mov z3.b, z0.b[2] +; SVE2_MIN_256_NOMAX-NEXT: mov z4.b, z0.b[1] +; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z1.b[1] +; SVE2_MIN_256_NOMAX-NEXT: mov z5.b, z0.b[4] +; SVE2_MIN_256_NOMAX-NEXT: mov z0.b, z0.b[6] +; SVE2_MIN_256_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z3.b, z5.b, z5.b +; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.h, z3.h, z0.h +; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s +; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2_MIN_256_NOMAX-NEXT: ret %op1 = load <8 x i8>, ptr %a %op2 = load <8 x i8>, ptr %b @@ -401,34 +338,23 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) { define <8 x i8> @negative_test_shuffle_index_size_op_both_maxhw(ptr %a, ptr %b) "target-features"="+sve2" vscale_range(16,16) { ; CHECK-LABEL: negative_test_shuffle_index_size_op_both_maxhw: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: mov z1.b, z0.b[7] -; CHECK-NEXT: mov z2.b, z0.b[6] -; CHECK-NEXT: mov z3.b, z0.b[4] -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: mov z2.b, z0.b[3] -; CHECK-NEXT: mov z1.b, z1.b[1] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: mov z3.b, z0.b[2] -; CHECK-NEXT: strb w9, [sp, #14] -; CHECK-NEXT: mov z0.b, z0.b[1] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strb w9, [sp, #11] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strb w9, [sp, #9] -; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z4.b, z0.b[1] +; CHECK-NEXT: mov z1.b, z1.b[1] +; CHECK-NEXT: mov z5.b, z0.b[7] +; CHECK-NEXT: mov z6.b, z0.b[6] +; CHECK-NEXT: mov z0.b, z0.b[4] +; CHECK-NEXT: zip1 z2.b, z3.b, z2.b +; CHECK-NEXT: zip1 z1.b, z1.b, z4.b +; CHECK-NEXT: zip1 z3.b, z6.b, z5.b +; CHECK-NEXT: zip1 z0.b, z0.b, z0.b +; CHECK-NEXT: zip1 z1.h, z1.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z1.s, z0.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %op1 = load <8 x i8>, ptr %a %op2 = load <8 x i8>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll index 617b560713c3ab..478072d33d8c9b 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll @@ -184,13 +184,11 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind { define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind { ; CHECK-LABEL: vls_sve_and_2xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fmov s1, wzr ; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: stp wzr, w8, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: zip1 z0.s, z1.s, z0.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: vls_sve_and_2xi16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll index b9264ad5f77c37..6644be11a02ba7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll @@ -91,19 +91,12 @@ define void @bitcast_v32i8(ptr %a, ptr %b) { define void @bitcast_v2i16(ptr %a, ptr %b) { ; CHECK-LABEL: bitcast_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: bitcast_v2i16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll index b8a2e0e0f4bd4c..9729a1d95cd916 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll @@ -222,3 +222,255 @@ define void @build_vector_no_stride_v4f64(ptr %a) { store <4 x double> , ptr %a, align 8 ret void } + +define void @build_vector_non_const_v4i1(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, w0, w1, lsl #1 +; CHECK-NEXT: orr w8, w8, w2, lsl #2 +; CHECK-NEXT: orr w8, w8, w3, lsl #3 +; CHECK-NEXT: strb w8, [x4] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v4i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr w8, w0, w1, lsl #1 +; NONEON-NOSVE-NEXT: orr w8, w8, w2, lsl #2 +; NONEON-NOSVE-NEXT: orr w8, w8, w3, lsl #3 +; NONEON-NOSVE-NEXT: strb w8, [x4] +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <4 x i1> undef, i1 %a, i64 0 + %2 = insertelement <4 x i1> %1, i1 %b, i64 1 + %3 = insertelement <4 x i1> %2, i1 %c, i64 2 + %4 = insertelement <4 x i1> %3, i1 %d, i64 3 + store <4 x i1> %4, ptr %out + ret void +} + +define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <2 x double> undef, double %a, i64 0 + %2 = insertelement <2 x double> %1, double %b, i64 1 + store <2 x double> %2, ptr %out + ret void +} + +define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $z1 +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <2 x float> undef, float %a, i64 0 + %2 = insertelement <2 x float> %1, float %b, i64 1 + store <2 x float> %2, ptr %out + ret void +} + +define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: // kill: def $s3 killed $s3 def $z3 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $z1 +; CHECK-NEXT: zip1 z2.s, z2.s, z3.s +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: zip1 z0.d, z0.d, z2.d +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp s2, s3, [sp, #8] +; NONEON-NOSVE-NEXT: stp s0, s1, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <4 x float> undef, float %a, i64 0 + %2 = insertelement <4 x float> %1, float %b, i64 1 + %3 = insertelement <4 x float> %2, float %c, i64 2 + %4 = insertelement <4 x float> %3, float %d, i64 3 + store <4 x float> %4, ptr %out + ret void +} + +define void @build_vector_non_const_v4f64(double %a, double %b, double %c, double %d, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $z3 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: zip1 z2.d, z2.d, z3.d +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: stp q0, q2, [x0] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: stp d2, d3, [sp, #16] +; NONEON-NOSVE-NEXT: ldp q1, q0, [sp] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <4 x double> undef, double %a, i64 0 + %2 = insertelement <4 x double> %1, double %b, i64 1 + %3 = insertelement <4 x double> %2, double %c, i64 2 + %4 = insertelement <4 x double> %3, double %d, i64 3 + store <4 x double> %4, ptr %out + ret void +} + +define void @build_vector_non_const_v8f16(half %a, half %b, half %c, half %d, half %e, half %f, half %g, half %h, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h6 killed $h6 def $z6 +; CHECK-NEXT: // kill: def $h4 killed $h4 def $z4 +; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: // kill: def $h7 killed $h7 def $z7 +; CHECK-NEXT: // kill: def $h5 killed $h5 def $z5 +; CHECK-NEXT: // kill: def $h3 killed $h3 def $z3 +; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1 +; CHECK-NEXT: zip1 z6.h, z6.h, z7.h +; CHECK-NEXT: zip1 z4.h, z4.h, z5.h +; CHECK-NEXT: zip1 z2.h, z2.h, z3.h +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h +; CHECK-NEXT: zip1 z1.s, z4.s, z6.s +; CHECK-NEXT: zip1 z0.s, z0.s, z2.s +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: str h7, [sp, #14] +; NONEON-NOSVE-NEXT: str h6, [sp, #12] +; NONEON-NOSVE-NEXT: str h5, [sp, #10] +; NONEON-NOSVE-NEXT: str h4, [sp, #8] +; NONEON-NOSVE-NEXT: str h3, [sp, #6] +; NONEON-NOSVE-NEXT: str h2, [sp, #4] +; NONEON-NOSVE-NEXT: str h1, [sp, #2] +; NONEON-NOSVE-NEXT: str h0, [sp] +; NONEON-NOSVE-NEXT: ldr q0, [sp] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <8 x half> undef, half %a, i64 0 + %2 = insertelement <8 x half> %1, half %b, i64 1 + %3 = insertelement <8 x half> %2, half %c, i64 2 + %4 = insertelement <8 x half> %3, half %d, i64 3 + %5 = insertelement <8 x half> %4, half %e, i64 4 + %6 = insertelement <8 x half> %5, half %f, i64 5 + %7 = insertelement <8 x half> %6, half %g, i64 6 + %8 = insertelement <8 x half> %7, half %h, i64 7 + store <8 x half> %8, ptr %out + ret void +} + +define void @build_vector_non_const_v2i32(i32 %a, i32 %b, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: zip1 z0.s, z1.s, z0.s +; CHECK-NEXT: str d0, [x2] +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: stp w0, w1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x2] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <2 x i32> undef, i32 %a, i64 0 + %2 = insertelement <2 x i32> %1, i32 %b, i64 1 + store <2 x i32> %2, ptr %out + ret void +} + +define void @build_vector_non_const_v8i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h, ptr %out) { +; CHECK-LABEL: build_vector_non_const_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: strb w7, [sp, #15] +; CHECK-NEXT: ldr x8, [sp, #16] +; CHECK-NEXT: strb w6, [sp, #14] +; CHECK-NEXT: strb w5, [sp, #13] +; CHECK-NEXT: strb w4, [sp, #12] +; CHECK-NEXT: strb w3, [sp, #11] +; CHECK-NEXT: strb w2, [sp, #10] +; CHECK-NEXT: strb w1, [sp, #9] +; CHECK-NEXT: strb w0, [sp, #8] +; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: str d0, [x8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_non_const_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: strb w7, [sp, #15] +; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] +; NONEON-NOSVE-NEXT: strb w6, [sp, #14] +; NONEON-NOSVE-NEXT: strb w5, [sp, #13] +; NONEON-NOSVE-NEXT: strb w4, [sp, #12] +; NONEON-NOSVE-NEXT: strb w3, [sp, #11] +; NONEON-NOSVE-NEXT: strb w2, [sp, #10] +; NONEON-NOSVE-NEXT: strb w1, [sp, #9] +; NONEON-NOSVE-NEXT: strb w0, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: str d0, [x8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret + %1 = insertelement <8 x i8> undef, i8 %a, i64 0 + %2 = insertelement <8 x i8> %1, i8 %b, i64 1 + %3 = insertelement <8 x i8> %2, i8 %c, i64 2 + %4 = insertelement <8 x i8> %3, i8 %d, i64 3 + %5 = insertelement <8 x i8> %4, i8 %e, i64 4 + %6 = insertelement <8 x i8> %5, i8 %f, i64 5 + %7 = insertelement <8 x i8> %6, i8 %g, i64 6 + %8 = insertelement <8 x i8> %7, i8 %h, i64 7 + store <8 x i8> %8, ptr %out + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll index 4b6285b2732fe5..c1810c678ea522 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll @@ -12,34 +12,22 @@ target triple = "aarch64-unknown-linux-gnu" define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: concat_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: mov z2.h, z1.h[3] -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z2.h, z1.h[3] ; CHECK-NEXT: mov z3.h, z1.h[2] -; CHECK-NEXT: mov z1.h, z1.h[1] -; CHECK-NEXT: mov z4.h, z0.h[3] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[2] -; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: strb w9, [sp, #8] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strb w9, [sp, #14] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z4.h, z1.h[1] +; CHECK-NEXT: mov z5.h, z0.h[3] +; CHECK-NEXT: mov z6.h, z0.h[2] +; CHECK-NEXT: mov z7.h, z0.h[1] +; CHECK-NEXT: zip1 z2.b, z3.b, z2.b +; CHECK-NEXT: zip1 z1.b, z1.b, z4.b +; CHECK-NEXT: zip1 z3.b, z6.b, z5.b +; CHECK-NEXT: zip1 z0.b, z0.b, z7.b +; CHECK-NEXT: zip1 z1.h, z1.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: concat_v8i8: @@ -152,22 +140,14 @@ define void @concat_v64i8(ptr %a, ptr %b, ptr %c) { define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-LABEL: concat_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z2.s, z1.s[1] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w9, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z3.s, z0.s[1] +; CHECK-NEXT: zip1 z1.h, z1.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: concat_v4i16: @@ -428,18 +408,14 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) { define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-LABEL: concat_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: str h1, [sp, #12] -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: str h2, [sp, #14] -; CHECK-NEXT: str h1, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z3.h, z0.h[1] +; CHECK-NEXT: zip1 z1.h, z1.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: concat_v4f16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll index 50a05cb4b1e277..7d6336a43a4fd1 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -326,29 +326,29 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) { ; CHECK-LABEL: load_sext_v2i64i256: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: mov z1.d, z0.d[1] -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: fmov x10, d1 -; CHECK-NEXT: stp x8, x9, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: asr x8, x10, #63 -; CHECK-NEXT: mov z0.d, x9 -; CHECK-NEXT: stp x10, x8, [sp, #16] -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: ldp q2, q4, [sp], #32 -; CHECK-NEXT: mov z3.d, z0.d[1] -; CHECK-NEXT: mov z5.d, z1.d[1] -; CHECK-NEXT: mov z6.d, z2.d[1] -; CHECK-NEXT: fmov x2, d0 -; CHECK-NEXT: mov z0.d, z4.d[1] -; CHECK-NEXT: fmov x6, d1 -; CHECK-NEXT: fmov x0, d2 -; CHECK-NEXT: fmov x4, d4 -; CHECK-NEXT: fmov x3, d3 -; CHECK-NEXT: fmov x7, d5 -; CHECK-NEXT: fmov x1, d6 -; CHECK-NEXT: fmov x5, d0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: asr x8, x8, #63 +; CHECK-NEXT: fmov d3, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: asr x9, x9, #63 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: zip1 z0.d, z0.d, z3.d +; CHECK-NEXT: mov z3.d, x9 +; CHECK-NEXT: fmov x2, d2 +; CHECK-NEXT: zip1 z1.d, z1.d, z4.d +; CHECK-NEXT: mov z4.d, z2.d[1] +; CHECK-NEXT: mov z5.d, z0.d[1] +; CHECK-NEXT: mov z6.d, z3.d[1] +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: fmov x6, d3 +; CHECK-NEXT: mov z2.d, z1.d[1] +; CHECK-NEXT: fmov x3, d4 +; CHECK-NEXT: fmov x1, d5 +; CHECK-NEXT: fmov x4, d1 +; CHECK-NEXT: fmov x7, d6 +; CHECK-NEXT: fmov x5, d2 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: load_sext_v2i64i256: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll index 2665696308463f..a728cbe97056db 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -10,23 +10,15 @@ target triple = "aarch64-unknown-linux-gnu" define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) { ; CHECK-LABEL: extract_subvector_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] ; CHECK-NEXT: mov z3.b, z0.b[5] ; CHECK-NEXT: mov z0.b, z0.b[4] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: zip1 z1.h, z2.h, z1.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: extract_subvector_v8i1: @@ -53,23 +45,15 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) { define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) { ; CHECK-LABEL: extract_subvector_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] ; CHECK-NEXT: mov z3.b, z0.b[5] ; CHECK-NEXT: mov z0.b, z0.b[4] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: zip1 z1.h, z2.h, z1.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: extract_subvector_v8i8: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index dad53b31db0b0f..f1771a753826cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -1126,49 +1126,39 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) { define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v4f16_v4f64: ; SVE: // %bb.0: -; SVE-NEXT: sub sp, sp, #16 -; SVE-NEXT: .cfi_def_cfa_offset 16 -; SVE-NEXT: ldp q1, q0, [x1] -; SVE-NEXT: ldr d4, [x0] -; SVE-NEXT: and z4.h, z4.h, #0x7fff -; SVE-NEXT: mov z2.d, z0.d[1] -; SVE-NEXT: mov z3.d, z1.d[1] -; SVE-NEXT: fcvt h0, d0 +; SVE-NEXT: ldp q0, q1, [x1] +; SVE-NEXT: mov z2.d, z1.d[1] +; SVE-NEXT: mov z3.d, z0.d[1] ; SVE-NEXT: fcvt h1, d1 +; SVE-NEXT: fcvt h0, d0 ; SVE-NEXT: fcvt h2, d2 ; SVE-NEXT: fcvt h3, d3 -; SVE-NEXT: str h0, [sp, #12] -; SVE-NEXT: str h1, [sp, #8] -; SVE-NEXT: str h2, [sp, #14] -; SVE-NEXT: str h3, [sp, #10] -; SVE-NEXT: ldr d0, [sp, #8] +; SVE-NEXT: zip1 z1.h, z1.h, z2.h +; SVE-NEXT: zip1 z0.h, z0.h, z3.h +; SVE-NEXT: zip1 z0.s, z0.s, z1.s +; SVE-NEXT: ldr d1, [x0] +; SVE-NEXT: and z1.h, z1.h, #0x7fff ; SVE-NEXT: and z0.h, z0.h, #0x8000 -; SVE-NEXT: orr z0.d, z4.d, z0.d +; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str d0, [x0] -; SVE-NEXT: add sp, sp, #16 ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v4f16_v4f64: ; SVE2: // %bb.0: -; SVE2-NEXT: sub sp, sp, #16 -; SVE2-NEXT: .cfi_def_cfa_offset 16 -; SVE2-NEXT: ldp q2, q1, [x1] -; SVE2-NEXT: mov z0.h, #32767 // =0x7fff -; SVE2-NEXT: ldr d5, [x0] -; SVE2-NEXT: mov z3.d, z1.d[1] -; SVE2-NEXT: mov z4.d, z2.d[1] +; SVE2-NEXT: ldp q0, q1, [x1] +; SVE2-NEXT: mov z2.d, z1.d[1] +; SVE2-NEXT: mov z3.d, z0.d[1] ; SVE2-NEXT: fcvt h1, d1 +; SVE2-NEXT: fcvt h0, d0 ; SVE2-NEXT: fcvt h2, d2 ; SVE2-NEXT: fcvt h3, d3 -; SVE2-NEXT: fcvt h4, d4 -; SVE2-NEXT: str h1, [sp, #12] -; SVE2-NEXT: str h2, [sp, #8] -; SVE2-NEXT: str h3, [sp, #14] -; SVE2-NEXT: str h4, [sp, #10] -; SVE2-NEXT: ldr d1, [sp, #8] -; SVE2-NEXT: bsl z5.d, z5.d, z1.d, z0.d -; SVE2-NEXT: str d5, [x0] -; SVE2-NEXT: add sp, sp, #16 +; SVE2-NEXT: zip1 z1.h, z1.h, z2.h +; SVE2-NEXT: zip1 z0.h, z0.h, z3.h +; SVE2-NEXT: mov z2.h, #32767 // =0x7fff +; SVE2-NEXT: zip1 z0.s, z0.s, z1.s +; SVE2-NEXT: ldr d1, [x0] +; SVE2-NEXT: bsl z1.d, z1.d, z0.d, z2.d +; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret ; ; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll index a206fbc5102953..11fee267660c03 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -443,9 +443,10 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) { ; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fcvtzu x8, h0 ; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: stp x8, x9, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i64: @@ -471,19 +472,20 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v4f16_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: mov z2.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: fcvtzu x10, h2 -; CHECK-NEXT: fcvtzu x11, h0 -; CHECK-NEXT: stp x8, x9, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: stp x11, x10, [sp, #16] -; CHECK-NEXT: ldp q1, q0, [sp] +; CHECK-NEXT: mov z1.h, z0.h[3] +; CHECK-NEXT: mov z2.h, z0.h[2] +; CHECK-NEXT: mov z3.h, z0.h[1] +; CHECK-NEXT: fcvtzu x10, h0 +; CHECK-NEXT: fcvtzu x8, h1 +; CHECK-NEXT: fcvtzu x9, h2 +; CHECK-NEXT: fcvtzu x11, h3 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: zip1 z1.d, z2.d, z1.d ; CHECK-NEXT: stp q1, q0, [x1] -; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64: @@ -521,31 +523,35 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: fcvtzu x12, h0 +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: mov z3.h, z0.h[2] +; CHECK-NEXT: mov z4.h, z0.h[1] +; CHECK-NEXT: fcvtzu x10, h0 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: fcvtzu x8, h1 -; CHECK-NEXT: mov z3.h, z1.h[3] -; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: fcvtzu x9, h2 -; CHECK-NEXT: mov z2.h, z0.h[1] -; CHECK-NEXT: fcvtzu x10, h3 -; CHECK-NEXT: mov z3.h, z0.h[3] -; CHECK-NEXT: fcvtzu x11, h1 -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: stp x8, x9, [sp, #-64]! -; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: fcvtzu x8, h2 ; CHECK-NEXT: fcvtzu x9, h3 -; CHECK-NEXT: stp x11, x10, [sp, #16] -; CHECK-NEXT: fcvtzu x10, h0 -; CHECK-NEXT: ldp q2, q3, [sp] -; CHECK-NEXT: stp x12, x8, [sp, #32] -; CHECK-NEXT: stp x10, x9, [sp, #48] -; CHECK-NEXT: ldp q1, q0, [sp, #32] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q1, q0, [x1] -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: fcvtzu x11, h4 +; CHECK-NEXT: mov z5.h, z1.h[3] +; CHECK-NEXT: mov z6.h, z1.h[2] +; CHECK-NEXT: mov z2.h, z1.h[1] +; CHECK-NEXT: fcvtzu x14, h1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: fcvtzu x12, h5 +; CHECK-NEXT: fcvtzu x13, h6 +; CHECK-NEXT: fcvtzu x15, h2 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fmov d4, x13 +; CHECK-NEXT: zip1 z2.d, z2.d, z3.d +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: zip1 z1.d, z4.d, z1.d +; CHECK-NEXT: fmov d4, x15 +; CHECK-NEXT: stp q2, q0, [x1] +; CHECK-NEXT: zip1 z3.d, z3.d, z4.d +; CHECK-NEXT: stp q3, q1, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64: @@ -598,57 +604,67 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) { define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v16f16_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: mov z5.d, z1.d +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: mov z4.h, z1.h[1] +; CHECK-NEXT: mov z6.h, z1.h[3] +; CHECK-NEXT: fcvtzu x9, h1 +; CHECK-NEXT: fcvtzu x8, h0 +; CHECK-NEXT: mov z7.h, z0.h[1] ; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 -; CHECK-NEXT: mov z4.h, z2.h[1] -; CHECK-NEXT: fcvtzu x8, h2 -; CHECK-NEXT: mov z5.h, z2.h[3] -; CHECK-NEXT: mov z2.h, z2.h[2] -; CHECK-NEXT: fcvtzu x12, h3 -; CHECK-NEXT: fcvtzu x9, h4 -; CHECK-NEXT: mov z4.h, z3.h[1] -; CHECK-NEXT: fcvtzu x10, h5 -; CHECK-NEXT: mov z5.h, z3.h[3] -; CHECK-NEXT: fcvtzu x11, h2 -; CHECK-NEXT: mov z2.h, z3.h[2] -; CHECK-NEXT: stp x8, x9, [sp, #-128]! -; CHECK-NEXT: .cfi_def_cfa_offset 128 -; CHECK-NEXT: fcvtzu x8, h4 -; CHECK-NEXT: fcvtzu x9, h5 -; CHECK-NEXT: stp x11, x10, [sp, #16] +; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8 ; CHECK-NEXT: fcvtzu x10, h2 -; CHECK-NEXT: mov z3.h, z1.h[1] -; CHECK-NEXT: mov z4.h, z1.h[3] -; CHECK-NEXT: fcvtzu x11, h1 +; CHECK-NEXT: fcvtzu x11, h4 +; CHECK-NEXT: fcvtzu x12, h6 ; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: mov z2.h, z0.h[1] -; CHECK-NEXT: stp x12, x8, [sp, #64] -; CHECK-NEXT: fcvtzu x12, h3 -; CHECK-NEXT: fcvtzu x8, h4 -; CHECK-NEXT: stp x10, x9, [sp, #80] -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: mov z3.h, z0.h[3] -; CHECK-NEXT: fcvtzu x10, h0 ; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: stp x11, x12, [sp, #32] -; CHECK-NEXT: fcvtzu x11, h2 -; CHECK-NEXT: fcvtzu x12, h3 -; CHECK-NEXT: stp x9, x8, [sp, #48] -; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: ldp q0, q1, [sp] -; CHECK-NEXT: ldp q3, q4, [sp, #64] -; CHECK-NEXT: stp x10, x11, [sp, #96] -; CHECK-NEXT: ldp q6, q7, [sp, #32] -; CHECK-NEXT: stp x8, x12, [sp, #112] -; CHECK-NEXT: ldp q5, q2, [sp, #96] -; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: stp q6, q7, [x1] -; CHECK-NEXT: stp q3, q4, [x1, #96] -; CHECK-NEXT: stp q5, q2, [x1, #64] -; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: fmov d16, x9 +; CHECK-NEXT: mov z2.h, z3.h[3] +; CHECK-NEXT: mov z4.h, z5.h[3] +; CHECK-NEXT: fcvtzu x14, h3 +; CHECK-NEXT: fcvtzu x13, h1 +; CHECK-NEXT: fcvtzu x15, h5 +; CHECK-NEXT: mov z1.h, z3.h[1] +; CHECK-NEXT: mov z6.h, z5.h[1] +; CHECK-NEXT: mov z5.h, z5.h[2] +; CHECK-NEXT: mov z3.h, z3.h[2] +; CHECK-NEXT: fcvtzu x9, h2 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fcvtzu x10, h4 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: fcvtzu x11, h7 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzu x12, h0 +; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: fcvtzu x13, h1 +; CHECK-NEXT: fmov d1, x14 +; CHECK-NEXT: fcvtzu x14, h6 +; CHECK-NEXT: fmov d6, x15 +; CHECK-NEXT: fcvtzu x15, h5 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: fcvtzu x9, h3 +; CHECK-NEXT: zip1 z4.d, z16.d, z4.d +; CHECK-NEXT: fmov d16, x8 +; CHECK-NEXT: zip1 z0.d, z0.d, z7.d +; CHECK-NEXT: fmov d3, x12 +; CHECK-NEXT: fmov d7, x10 +; CHECK-NEXT: stp q4, q0, [x1, #64] +; CHECK-NEXT: fmov d0, x14 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: zip1 z2.d, z3.d, z2.d +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: zip1 z0.d, z6.d, z0.d +; CHECK-NEXT: zip1 z4.d, z4.d, z5.d +; CHECK-NEXT: zip1 z3.d, z16.d, z3.d +; CHECK-NEXT: fmov d16, x15 +; CHECK-NEXT: stp q3, q2, [x1] +; CHECK-NEXT: fmov d2, x13 +; CHECK-NEXT: zip1 z7.d, z16.d, z7.d +; CHECK-NEXT: zip1 z1.d, z1.d, z2.d +; CHECK-NEXT: stp q0, q7, [x1, #96] +; CHECK-NEXT: stp q1, q4, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64: @@ -1216,26 +1232,18 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) { define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) { ; CHECK-LABEL: fcvtzu_v4f64_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: mov z2.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z1.s[1] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: mov z2.s, z1.s[1] +; CHECK-NEXT: mov z3.s, z0.s[1] +; CHECK-NEXT: zip1 z1.h, z1.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i16: @@ -1270,40 +1278,29 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) { define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) { ; CHECK-LABEL: fcvtzu_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.s, z1.s[1] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z2.s[1] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.s, z3.s[1] -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: mov z4.s, z0.s[1] +; CHECK-NEXT: mov z5.s, z1.s[1] +; CHECK-NEXT: mov z6.s, z3.s[1] +; CHECK-NEXT: mov z7.s, z2.s[1] +; CHECK-NEXT: zip1 z0.h, z0.h, z4.h +; CHECK-NEXT: zip1 z1.h, z1.h, z5.h +; CHECK-NEXT: zip1 z3.h, z3.h, z6.h +; CHECK-NEXT: zip1 z2.h, z2.h, z7.h +; CHECK-NEXT: zip1 z0.s, z1.s, z0.s +; CHECK-NEXT: zip1 z1.s, z2.s, z3.s +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i16: @@ -1360,73 +1357,50 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) { define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q5, q6, [x0, #96] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: ldp q0, q4, [x0, #32] +; CHECK-NEXT: ldp q2, q7, [x0, #64] +; CHECK-NEXT: ldp q1, q3, [x0] +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: ldp q6, q7, [x0, #64] ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z16.s, z1.s[1] -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z0.s, z2.s[1] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z3.s[1] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: movprfx z3, z7 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: uzp1 z1.s, z4.s, z4.s -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s -; CHECK-NEXT: mov z3.s, z5.s[1] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strh w8, [sp, #28] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.s, z1.s[1] -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: strh w8, [sp, #20] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z2.s[1] -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #30] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #26] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #22] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #18] -; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: stp q1, q0, [x1] -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: mov z17.s, z6.s[1] +; CHECK-NEXT: mov z16.s, z4.s[1] +; CHECK-NEXT: mov z18.s, z5.s[1] +; CHECK-NEXT: mov z21.s, z0.s[1] +; CHECK-NEXT: mov z19.s, z7.s[1] +; CHECK-NEXT: mov z20.s, z2.s[1] +; CHECK-NEXT: mov z22.s, z3.s[1] +; CHECK-NEXT: mov z23.s, z1.s[1] +; CHECK-NEXT: zip1 z6.h, z6.h, z17.h +; CHECK-NEXT: zip1 z4.h, z4.h, z16.h +; CHECK-NEXT: zip1 z5.h, z5.h, z18.h +; CHECK-NEXT: zip1 z0.h, z0.h, z21.h +; CHECK-NEXT: zip1 z7.h, z7.h, z19.h +; CHECK-NEXT: zip1 z2.h, z2.h, z20.h +; CHECK-NEXT: zip1 z3.h, z3.h, z22.h +; CHECK-NEXT: zip1 z1.h, z1.h, z23.h +; CHECK-NEXT: zip1 z5.s, z5.s, z6.s +; CHECK-NEXT: zip1 z0.s, z0.s, z4.s +; CHECK-NEXT: zip1 z2.s, z2.s, z7.s +; CHECK-NEXT: zip1 z1.s, z1.s, z3.s +; CHECK-NEXT: zip1 z2.d, z2.d, z5.d +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: stp q0, q2, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzu_v16f64_v16i16: @@ -2187,9 +2161,10 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) { ; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fcvtzs x8, h0 ; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: stp x8, x9, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i64: @@ -2215,19 +2190,20 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v4f16_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: mov z2.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: fcvtzs x11, h0 -; CHECK-NEXT: stp x8, x9, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: stp x11, x10, [sp, #16] -; CHECK-NEXT: ldp q1, q0, [sp] +; CHECK-NEXT: mov z1.h, z0.h[3] +; CHECK-NEXT: mov z2.h, z0.h[2] +; CHECK-NEXT: mov z3.h, z0.h[1] +; CHECK-NEXT: fcvtzs x10, h0 +; CHECK-NEXT: fcvtzs x8, h1 +; CHECK-NEXT: fcvtzs x9, h2 +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: zip1 z1.d, z2.d, z1.d ; CHECK-NEXT: stp q1, q0, [x1] -; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64: @@ -2265,31 +2241,35 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: fcvtzs x12, h0 +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: mov z3.h, z0.h[2] +; CHECK-NEXT: mov z4.h, z0.h[1] +; CHECK-NEXT: fcvtzs x10, h0 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: fcvtzs x8, h1 -; CHECK-NEXT: mov z3.h, z1.h[3] -; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: fcvtzs x9, h2 -; CHECK-NEXT: mov z2.h, z0.h[1] -; CHECK-NEXT: fcvtzs x10, h3 -; CHECK-NEXT: mov z3.h, z0.h[3] -; CHECK-NEXT: fcvtzs x11, h1 -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: stp x8, x9, [sp, #-64]! -; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: fcvtzs x8, h2 ; CHECK-NEXT: fcvtzs x9, h3 -; CHECK-NEXT: stp x11, x10, [sp, #16] -; CHECK-NEXT: fcvtzs x10, h0 -; CHECK-NEXT: ldp q2, q3, [sp] -; CHECK-NEXT: stp x12, x8, [sp, #32] -; CHECK-NEXT: stp x10, x9, [sp, #48] -; CHECK-NEXT: ldp q1, q0, [sp, #32] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q1, q0, [x1] -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: fcvtzs x11, h4 +; CHECK-NEXT: mov z5.h, z1.h[3] +; CHECK-NEXT: mov z6.h, z1.h[2] +; CHECK-NEXT: mov z2.h, z1.h[1] +; CHECK-NEXT: fcvtzs x14, h1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: fcvtzs x12, h5 +; CHECK-NEXT: fcvtzs x13, h6 +; CHECK-NEXT: fcvtzs x15, h2 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fmov d4, x13 +; CHECK-NEXT: zip1 z2.d, z2.d, z3.d +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: zip1 z1.d, z4.d, z1.d +; CHECK-NEXT: fmov d4, x15 +; CHECK-NEXT: stp q2, q0, [x1] +; CHECK-NEXT: zip1 z3.d, z3.d, z4.d +; CHECK-NEXT: stp q3, q1, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64: @@ -2342,57 +2322,67 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) { define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v16f16_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: mov z5.d, z1.d +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: mov z4.h, z1.h[1] +; CHECK-NEXT: mov z6.h, z1.h[3] +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: mov z7.h, z0.h[1] ; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 -; CHECK-NEXT: mov z4.h, z2.h[1] -; CHECK-NEXT: fcvtzs x8, h2 -; CHECK-NEXT: mov z5.h, z2.h[3] -; CHECK-NEXT: mov z2.h, z2.h[2] -; CHECK-NEXT: fcvtzs x12, h3 -; CHECK-NEXT: fcvtzs x9, h4 -; CHECK-NEXT: mov z4.h, z3.h[1] -; CHECK-NEXT: fcvtzs x10, h5 -; CHECK-NEXT: mov z5.h, z3.h[3] -; CHECK-NEXT: fcvtzs x11, h2 -; CHECK-NEXT: mov z2.h, z3.h[2] -; CHECK-NEXT: stp x8, x9, [sp, #-128]! -; CHECK-NEXT: .cfi_def_cfa_offset 128 -; CHECK-NEXT: fcvtzs x8, h4 -; CHECK-NEXT: fcvtzs x9, h5 -; CHECK-NEXT: stp x11, x10, [sp, #16] +; CHECK-NEXT: ext z5.b, z5.b, z1.b, #8 ; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: mov z3.h, z1.h[1] -; CHECK-NEXT: mov z4.h, z1.h[3] -; CHECK-NEXT: fcvtzs x11, h1 +; CHECK-NEXT: fcvtzs x11, h4 +; CHECK-NEXT: fcvtzs x12, h6 ; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: mov z2.h, z0.h[1] -; CHECK-NEXT: stp x12, x8, [sp, #64] -; CHECK-NEXT: fcvtzs x12, h3 -; CHECK-NEXT: fcvtzs x8, h4 -; CHECK-NEXT: stp x10, x9, [sp, #80] -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: mov z3.h, z0.h[3] -; CHECK-NEXT: fcvtzs x10, h0 ; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: stp x11, x12, [sp, #32] -; CHECK-NEXT: fcvtzs x11, h2 -; CHECK-NEXT: fcvtzs x12, h3 -; CHECK-NEXT: stp x9, x8, [sp, #48] -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: ldp q0, q1, [sp] -; CHECK-NEXT: ldp q3, q4, [sp, #64] -; CHECK-NEXT: stp x10, x11, [sp, #96] -; CHECK-NEXT: ldp q6, q7, [sp, #32] -; CHECK-NEXT: stp x8, x12, [sp, #112] -; CHECK-NEXT: ldp q5, q2, [sp, #96] -; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: stp q6, q7, [x1] -; CHECK-NEXT: stp q3, q4, [x1, #96] -; CHECK-NEXT: stp q5, q2, [x1, #64] -; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: fmov d16, x9 +; CHECK-NEXT: mov z2.h, z3.h[3] +; CHECK-NEXT: mov z4.h, z5.h[3] +; CHECK-NEXT: fcvtzs x14, h3 +; CHECK-NEXT: fcvtzs x13, h1 +; CHECK-NEXT: fcvtzs x15, h5 +; CHECK-NEXT: mov z1.h, z3.h[1] +; CHECK-NEXT: mov z6.h, z5.h[1] +; CHECK-NEXT: mov z5.h, z5.h[2] +; CHECK-NEXT: mov z3.h, z3.h[2] +; CHECK-NEXT: fcvtzs x9, h2 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fcvtzs x10, h4 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: fcvtzs x11, h7 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, h0 +; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: fcvtzs x13, h1 +; CHECK-NEXT: fmov d1, x14 +; CHECK-NEXT: fcvtzs x14, h6 +; CHECK-NEXT: fmov d6, x15 +; CHECK-NEXT: fcvtzs x15, h5 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: fcvtzs x9, h3 +; CHECK-NEXT: zip1 z4.d, z16.d, z4.d +; CHECK-NEXT: fmov d16, x8 +; CHECK-NEXT: zip1 z0.d, z0.d, z7.d +; CHECK-NEXT: fmov d3, x12 +; CHECK-NEXT: fmov d7, x10 +; CHECK-NEXT: stp q4, q0, [x1, #64] +; CHECK-NEXT: fmov d0, x14 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: zip1 z2.d, z3.d, z2.d +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: zip1 z0.d, z6.d, z0.d +; CHECK-NEXT: zip1 z4.d, z4.d, z5.d +; CHECK-NEXT: zip1 z3.d, z16.d, z3.d +; CHECK-NEXT: fmov d16, x15 +; CHECK-NEXT: stp q3, q2, [x1] +; CHECK-NEXT: fmov d2, x13 +; CHECK-NEXT: zip1 z7.d, z16.d, z7.d +; CHECK-NEXT: zip1 z1.d, z1.d, z2.d +; CHECK-NEXT: stp q0, q7, [x1, #96] +; CHECK-NEXT: stp q1, q4, [x1, #32] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64: @@ -2962,26 +2952,18 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) { define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) { ; CHECK-LABEL: fcvtzs_v4f64_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: mov z2.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z1.s[1] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: mov z2.s, z1.s[1] +; CHECK-NEXT: mov z3.s, z0.s[1] +; CHECK-NEXT: zip1 z1.h, z1.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i16: @@ -3016,40 +2998,29 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) { define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) { ; CHECK-LABEL: fcvtzs_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.s, z1.s[1] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z2.s[1] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.s, z3.s[1] -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: mov z4.s, z0.s[1] +; CHECK-NEXT: mov z5.s, z1.s[1] +; CHECK-NEXT: mov z6.s, z3.s[1] +; CHECK-NEXT: mov z7.s, z2.s[1] +; CHECK-NEXT: zip1 z0.h, z0.h, z4.h +; CHECK-NEXT: zip1 z1.h, z1.h, z5.h +; CHECK-NEXT: zip1 z3.h, z3.h, z6.h +; CHECK-NEXT: zip1 z2.h, z2.h, z7.h +; CHECK-NEXT: zip1 z0.s, z1.s, z0.s +; CHECK-NEXT: zip1 z1.s, z2.s, z3.s +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i16: @@ -3106,73 +3077,50 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) { define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q5, q6, [x0, #96] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: ldp q0, q4, [x0, #32] +; CHECK-NEXT: ldp q2, q7, [x0, #64] +; CHECK-NEXT: ldp q1, q3, [x0] +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: ldp q6, q7, [x0, #64] ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z16.s, z1.s[1] -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z0.s, z2.s[1] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z3.s[1] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: movprfx z3, z7 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: uzp1 z1.s, z4.s, z4.s -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s -; CHECK-NEXT: mov z3.s, z5.s[1] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strh w8, [sp, #28] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.s, z1.s[1] -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: strh w8, [sp, #20] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z2.s[1] -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #30] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #26] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #22] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #18] -; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: stp q1, q0, [x1] -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: mov z17.s, z6.s[1] +; CHECK-NEXT: mov z16.s, z4.s[1] +; CHECK-NEXT: mov z18.s, z5.s[1] +; CHECK-NEXT: mov z21.s, z0.s[1] +; CHECK-NEXT: mov z19.s, z7.s[1] +; CHECK-NEXT: mov z20.s, z2.s[1] +; CHECK-NEXT: mov z22.s, z3.s[1] +; CHECK-NEXT: mov z23.s, z1.s[1] +; CHECK-NEXT: zip1 z6.h, z6.h, z17.h +; CHECK-NEXT: zip1 z4.h, z4.h, z16.h +; CHECK-NEXT: zip1 z5.h, z5.h, z18.h +; CHECK-NEXT: zip1 z0.h, z0.h, z21.h +; CHECK-NEXT: zip1 z7.h, z7.h, z19.h +; CHECK-NEXT: zip1 z2.h, z2.h, z20.h +; CHECK-NEXT: zip1 z3.h, z3.h, z22.h +; CHECK-NEXT: zip1 z1.h, z1.h, z23.h +; CHECK-NEXT: zip1 z5.s, z5.s, z6.s +; CHECK-NEXT: zip1 z0.s, z0.s, z4.s +; CHECK-NEXT: zip1 z2.s, z2.s, z7.s +; CHECK-NEXT: zip1 z1.s, z1.s, z3.s +; CHECK-NEXT: zip1 z2.d, z2.d, z5.d +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: stp q0, q2, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvtzs_v16f64_v16i16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll index 035c76b569298a..ad5f91a5f39a49 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -8,25 +8,18 @@ target triple = "aarch64-unknown-linux-gnu" define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) { ; CHECK-LABEL: select_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: mov z3.s, z2.s[1] -; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: zip1 z2.h, z2.h, z3.h ; CHECK-NEXT: lsl z2.h, z2.h, #15 ; CHECK-NEXT: asr z2.h, z2.h, #15 ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: select_v2f16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll index d77473ed8f08e5..275d13ebfd9491 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll @@ -506,14 +506,10 @@ define <4 x i64> @insertelement_v4i64(ptr %a) { define <2 x half> @insertelement_v2f16(<2 x half> %op1) { ; CHECK-LABEL: insertelement_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: fmov h1, #5.00000000 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: str h1, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: insertelement_v2f16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index 0c712a15d4de2f..e595686cb4975d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -1140,18 +1140,14 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) { define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK-LABEL: ucvtf_v2i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d[1] ; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: ucvtf h0, x8 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: ucvtf h1, x8 -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: str h1, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ucvtf h1, x9 +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f16: @@ -2598,18 +2594,14 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) { define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK-LABEL: scvtf_v2i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d[1] ; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: scvtf h0, x8 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: scvtf h1, x8 -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: str h1, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: scvtf h1, x9 +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index 270f05a806b82d..613543310f2c31 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -10,25 +10,20 @@ declare void @def(ptr) define void @alloc_v4i8(ptr %st_ptr) nounwind { ; CHECK-LABEL: alloc_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: add x0, sp, #28 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: add x20, sp, #28 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x20, sp, #12 ; CHECK-NEXT: bl def ; CHECK-NEXT: ptrue p0.b, vl2 ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: mov z2.b, z0.b[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: stp w8, w9, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: zip1 z0.s, z0.s, z2.s ; CHECK-NEXT: st1b { z0.s }, p0, [x19] -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: alloc_v4i8: @@ -62,32 +57,28 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind { define void @alloc_v6i8(ptr %st_ptr) nounwind { ; CHECK-LABEL: alloc_v6i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: bl def -; CHECK-NEXT: ldr d0, [sp, #24] +; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: add x8, sp, #4 ; CHECK-NEXT: ptrue p1.s, vl2 ; CHECK-NEXT: mov z1.b, z0.b[3] -; CHECK-NEXT: mov z2.b, z0.b[5] -; CHECK-NEXT: mov z0.b, z0.b[1] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: add x8, sp, #20 -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: st1b { z0.h }, p0, [x8] -; CHECK-NEXT: ld1h { z0.s }, p1/z, [x8] -; CHECK-NEXT: strb w9, [x19, #2] +; CHECK-NEXT: mov z2.b, z0.b[1] +; CHECK-NEXT: mov z0.b, z0.b[5] +; CHECK-NEXT: zip1 z1.h, z2.h, z1.h +; CHECK-NEXT: zip1 z1.s, z1.s, z0.s +; CHECK-NEXT: st1b { z1.h }, p0, [x8] +; CHECK-NEXT: ld1h { z1.s }, p1/z, [x8] ; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x19, #2] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: strh w8, [x19] -; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: alloc_v6i8: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll index 5f4b9dd1592cf2..9055b2efba3282 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -1466,23 +1466,18 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) { define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) { ; CHECK-LABEL: masked_load_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: mov z2.s, z0.s[1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: zip1 z0.h, z0.h, z2.h +; CHECK-NEXT: zip1 z1.h, z1.h, z1.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: masked_load_v2f16: @@ -2318,33 +2313,21 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) { ; CHECK-LABEL: masked_load_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov z1.b, z0.b[3] ; CHECK-NEXT: mov z2.b, z0.b[2] +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: mov z3.b, z0.b[1] ; CHECK-NEXT: mov z4.b, z0.b[7] -; CHECK-NEXT: strh w8, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.b, z0.b[6] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[5] -; CHECK-NEXT: mov z0.b, z0.b[4] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #4] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: ldp d0, d1, [sp] +; CHECK-NEXT: mov z5.b, z0.b[6] +; CHECK-NEXT: mov z6.b, z0.b[5] +; CHECK-NEXT: mov z7.b, z0.b[4] +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: zip1 z1.h, z2.h, z1.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z2.h, z5.h, z4.h +; CHECK-NEXT: zip1 z3.h, z7.h, z6.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: zip1 z1.s, z3.s, z2.s ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: lsl z0.s, z0.s, #31 @@ -2357,7 +2340,6 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: masked_load_v8f32: @@ -2684,23 +2666,21 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) { define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) { ; CHECK-LABEL: masked_load_zext_v3i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strh w3, [sp, #12] +; CHECK-NEXT: fmov s0, w2 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: strh w2, [sp, #10] -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: strh w1, [sp, #8] -; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: zip1 z0.h, z1.h, z0.h +; CHECK-NEXT: fmov s1, w3 +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: masked_load_zext_v3i32: @@ -2759,23 +2739,21 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) { define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) { ; CHECK-LABEL: masked_load_sext_v3i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strh w3, [sp, #12] +; CHECK-NEXT: fmov s0, w2 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: strh w2, [sp, #10] -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: strh w1, [sp, #8] -; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: zip1 z0.h, z1.h, z0.h +; CHECK-NEXT: fmov s1, w3 +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: masked_load_sext_v3i32: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll index 0c3411e5f55148..265480b571970f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll @@ -589,23 +589,18 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) { define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) { ; CHECK-LABEL: masked_store_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: mov z2.s, z0.s[1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: zip1 z0.h, z0.h, z2.h +; CHECK-NEXT: zip1 z1.h, z1.h, z1.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: masked_store_v2f16: @@ -1014,48 +1009,33 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) { define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) { ; CHECK-LABEL: masked_store_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: mov z3.b, z0.b[5] ; CHECK-NEXT: mov z4.b, z0.b[4] +; CHECK-NEXT: mov z5.b, z0.b[3] +; CHECK-NEXT: mov z6.b, z0.b[2] +; CHECK-NEXT: mov z7.b, z0.b[1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[3] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z0.b[2] -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: mov z4.b, z0.b[1] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: zip1 z1.h, z2.h, z1.h +; CHECK-NEXT: zip1 z2.h, z4.h, z3.h +; CHECK-NEXT: zip1 z3.h, z6.h, z5.h +; CHECK-NEXT: zip1 z0.h, z0.h, z7.h +; CHECK-NEXT: zip1 z1.s, z2.s, z1.s +; CHECK-NEXT: zip1 z0.s, z0.s, z3.s ; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: lsl z1.s, z1.s, #31 +; CHECK-NEXT: lsl z0.s, z0.s, #31 ; CHECK-NEXT: asr z1.s, z1.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 ; CHECK-NEXT: mov z1.s, #0 // =0x0 -; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: ldr d0, [sp] -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z0.s, z0.s, #31 -; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z1.s }, p0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: masked_store_v8f32: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll index b91f813c5141bb..8b296d9fbc215d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll @@ -9,65 +9,44 @@ target triple = "aarch64-unknown-linux-gnu" define void @zip1_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: zip1_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: mov z2.b, z0.b[15] -; CHECK-NEXT: mov z3.b, z0.b[14] -; CHECK-NEXT: mov z4.b, z0.b[13] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: mov z3.b, z0.b[11] -; CHECK-NEXT: mov z2.b, z0.b[12] -; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z0.b[10] -; CHECK-NEXT: strb w9, [sp, #12] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[9] -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z0.b[8] -; CHECK-NEXT: strb w9, [sp, #8] +; CHECK-NEXT: mov z4.b, z0.b[14] +; CHECK-NEXT: mov z6.b, z0.b[13] +; CHECK-NEXT: mov z3.b, z1.b[15] +; CHECK-NEXT: mov z5.b, z1.b[14] +; CHECK-NEXT: mov z7.b, z1.b[13] +; CHECK-NEXT: mov z16.b, z0.b[12] +; CHECK-NEXT: mov z17.b, z1.b[12] +; CHECK-NEXT: mov z18.b, z0.b[11] +; CHECK-NEXT: mov z19.b, z1.b[11] +; CHECK-NEXT: mov z20.b, z0.b[10] +; CHECK-NEXT: mov z21.b, z1.b[10] +; CHECK-NEXT: mov z22.b, z0.b[9] +; CHECK-NEXT: mov z23.b, z1.b[9] +; CHECK-NEXT: mov z24.b, z0.b[8] +; CHECK-NEXT: mov z25.b, z1.b[8] +; CHECK-NEXT: zip1 z2.b, z2.b, z3.b +; CHECK-NEXT: zip1 z3.b, z4.b, z5.b +; CHECK-NEXT: zip1 z4.b, z6.b, z7.b +; CHECK-NEXT: zip1 z5.b, z16.b, z17.b +; CHECK-NEXT: zip1 z6.b, z18.b, z19.b +; CHECK-NEXT: zip1 z7.b, z20.b, z21.b +; CHECK-NEXT: zip1 z16.b, z22.b, z23.b ; CHECK-NEXT: zip1 z0.b, z0.b, z1.b -; CHECK-NEXT: strb w8, [sp, #6] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[15] -; CHECK-NEXT: strb w8, [sp, #4] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[14] -; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z1.b[13] -; CHECK-NEXT: strb w8, [sp] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[12] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[11] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z1.b[10] -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[9] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[8] -; CHECK-NEXT: strb w9, [sp, #5] -; CHECK-NEXT: strb w8, [sp, #7] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strb w8, [sp, #3] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z17.b, z24.b, z25.b +; CHECK-NEXT: zip1 z2.h, z3.h, z2.h +; CHECK-NEXT: zip1 z3.h, z5.h, z4.h +; CHECK-NEXT: zip1 z4.h, z7.h, z6.h ; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: zip1 z5.h, z17.h, z16.h +; CHECK-NEXT: zip1 z2.s, z3.s, z2.s +; CHECK-NEXT: zip1 z3.s, z5.s, z4.s +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: str q1, [x0, #16] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip1_v32i8: @@ -159,123 +138,97 @@ define void @zip1_v32i8(ptr %a, ptr %b) { define void @zip_v32i16(ptr %a, ptr %b) { ; CHECK-LABEL: zip_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: ldp q1, q3, [x1] -; CHECK-NEXT: ldp q0, q4, [x0] -; CHECK-NEXT: ldp q2, q5, [x0, #32] -; CHECK-NEXT: mov z16.h, z3.h[7] -; CHECK-NEXT: mov z18.h, z3.h[6] -; CHECK-NEXT: mov z17.h, z4.h[7] -; CHECK-NEXT: ldp q6, q7, [x1, #32] -; CHECK-NEXT: mov z19.h, z4.h[6] -; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: .cfi_offset b8, -8 +; CHECK-NEXT: .cfi_offset b9, -16 +; CHECK-NEXT: .cfi_offset b10, -24 +; CHECK-NEXT: .cfi_offset b11, -32 +; CHECK-NEXT: .cfi_offset b12, -40 +; CHECK-NEXT: .cfi_offset b13, -48 +; CHECK-NEXT: .cfi_offset b14, -56 +; CHECK-NEXT: .cfi_offset b15, -64 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x1] +; CHECK-NEXT: mov z5.h, z1.h[7] +; CHECK-NEXT: mov z7.h, z1.h[6] +; CHECK-NEXT: mov z17.h, z1.h[5] +; CHECK-NEXT: mov z4.h, z3.h[7] +; CHECK-NEXT: mov z6.h, z3.h[6] ; CHECK-NEXT: mov z16.h, z3.h[5] -; CHECK-NEXT: fmov w9, s17 -; CHECK-NEXT: mov z17.h, z4.h[5] -; CHECK-NEXT: mov z20.h, z7.h[6] -; CHECK-NEXT: strh w8, [sp, #30] -; CHECK-NEXT: fmov w8, s18 +; CHECK-NEXT: mov z20.h, z2.h[7] +; CHECK-NEXT: mov z21.h, z0.h[7] ; CHECK-NEXT: mov z18.h, z3.h[4] -; CHECK-NEXT: strh w9, [sp, #28] -; CHECK-NEXT: fmov w9, s19 -; CHECK-NEXT: mov z19.h, z5.h[7] -; CHECK-NEXT: zip1 z3.h, z4.h, z3.h -; CHECK-NEXT: strh w8, [sp, #26] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z4.h[4] -; CHECK-NEXT: strh w9, [sp, #24] -; CHECK-NEXT: zip1 z4.h, z5.h, z7.h -; CHECK-NEXT: strh w8, [sp, #22] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: mov z17.h, z1.h[7] -; CHECK-NEXT: add z3.h, z3.h, z4.h -; CHECK-NEXT: strh w8, [sp, #20] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z0.h[7] -; CHECK-NEXT: strh w8, [sp, #18] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z1.h[6] -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: mov z17.h, z0.h[6] -; CHECK-NEXT: strh w8, [sp, #62] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z1.h[5] -; CHECK-NEXT: strh w8, [sp, #60] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z0.h[5] -; CHECK-NEXT: strh w8, [sp, #58] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: mov z17.h, z1.h[4] -; CHECK-NEXT: strh w8, [sp, #56] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z0.h[4] -; CHECK-NEXT: zip1 z0.h, z0.h, z1.h -; CHECK-NEXT: zip1 z1.h, z2.h, z6.h -; CHECK-NEXT: strh w8, [sp, #54] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: ldr q16, [sp, #16] -; CHECK-NEXT: add z0.h, z0.h, z1.h -; CHECK-NEXT: strh w8, [sp, #52] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: strh w8, [sp, #50] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z7.h[7] -; CHECK-NEXT: strh w8, [sp, #48] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z5.h[6] -; CHECK-NEXT: ldr q17, [sp, #48] -; CHECK-NEXT: strh w8, [sp, #46] -; CHECK-NEXT: fmov w8, s19 -; CHECK-NEXT: mov z19.h, z7.h[5] -; CHECK-NEXT: strh w8, [sp, #44] -; CHECK-NEXT: fmov w8, s20 -; CHECK-NEXT: mov z20.h, z5.h[5] -; CHECK-NEXT: strh w8, [sp, #42] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z7.h[4] -; CHECK-NEXT: strh w8, [sp, #40] -; CHECK-NEXT: fmov w8, s19 -; CHECK-NEXT: mov z19.h, z5.h[4] -; CHECK-NEXT: strh w8, [sp, #38] -; CHECK-NEXT: fmov w8, s20 -; CHECK-NEXT: mov z20.h, z6.h[7] -; CHECK-NEXT: strh w8, [sp, #36] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z2.h[7] -; CHECK-NEXT: strh w8, [sp, #34] -; CHECK-NEXT: fmov w8, s19 -; CHECK-NEXT: mov z19.h, z6.h[6] -; CHECK-NEXT: strh w8, [sp, #32] -; CHECK-NEXT: fmov w8, s20 -; CHECK-NEXT: mov z20.h, z2.h[6] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z6.h[5] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s19 -; CHECK-NEXT: mov z19.h, z2.h[5] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s20 -; CHECK-NEXT: mov z20.h, z6.h[4] -; CHECK-NEXT: fmov w9, s19 -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z2.h[4] -; CHECK-NEXT: strh w9, [sp, #4] -; CHECK-NEXT: ldr q2, [sp, #32] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s20 -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: add z2.h, z16.h, z2.h -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: stp q3, q2, [x0, #32] -; CHECK-NEXT: add z1.h, z17.h, z4.h -; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: mov z19.h, z1.h[4] +; CHECK-NEXT: mov z22.h, z2.h[6] +; CHECK-NEXT: mov z23.h, z0.h[6] +; CHECK-NEXT: zip1 z24.h, z5.h, z4.h +; CHECK-NEXT: zip1 z25.h, z7.h, z6.h +; CHECK-NEXT: zip1 z17.h, z17.h, z16.h +; CHECK-NEXT: ldp q4, q6, [x0, #32] +; CHECK-NEXT: zip1 z16.h, z21.h, z20.h +; CHECK-NEXT: ldp q5, q7, [x1, #32] +; CHECK-NEXT: zip1 z18.h, z19.h, z18.h +; CHECK-NEXT: zip1 z19.s, z25.s, z24.s +; CHECK-NEXT: zip1 z22.h, z23.h, z22.h +; CHECK-NEXT: mov z23.h, z2.h[5] +; CHECK-NEXT: mov z21.h, z6.h[7] +; CHECK-NEXT: mov z24.h, z0.h[5] +; CHECK-NEXT: mov z25.h, z2.h[4] +; CHECK-NEXT: mov z20.h, z7.h[7] +; CHECK-NEXT: mov z26.h, z0.h[4] +; CHECK-NEXT: mov z27.h, z6.h[6] +; CHECK-NEXT: mov z28.h, z7.h[5] +; CHECK-NEXT: mov z29.h, z6.h[5] +; CHECK-NEXT: mov z30.h, z7.h[4] +; CHECK-NEXT: mov z31.h, z6.h[4] +; CHECK-NEXT: mov z8.h, z5.h[7] +; CHECK-NEXT: mov z9.h, z4.h[7] +; CHECK-NEXT: zip1 z20.h, z21.h, z20.h +; CHECK-NEXT: mov z21.h, z7.h[6] +; CHECK-NEXT: mov z10.h, z5.h[6] +; CHECK-NEXT: mov z11.h, z4.h[6] +; CHECK-NEXT: mov z12.h, z5.h[5] +; CHECK-NEXT: mov z13.h, z4.h[5] +; CHECK-NEXT: mov z14.h, z5.h[4] +; CHECK-NEXT: mov z15.h, z4.h[4] +; CHECK-NEXT: zip1 z23.h, z24.h, z23.h +; CHECK-NEXT: zip1 z21.h, z27.h, z21.h +; CHECK-NEXT: zip1 z27.h, z29.h, z28.h +; CHECK-NEXT: zip1 z28.h, z31.h, z30.h +; CHECK-NEXT: zip1 z24.h, z26.h, z25.h +; CHECK-NEXT: zip1 z25.h, z9.h, z8.h +; CHECK-NEXT: zip1 z26.h, z11.h, z10.h +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: zip1 z29.h, z13.h, z12.h +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: zip1 z30.h, z15.h, z14.h +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: zip1 z17.s, z18.s, z17.s +; CHECK-NEXT: zip1 z18.s, z21.s, z20.s +; CHECK-NEXT: zip1 z20.s, z28.s, z27.s +; CHECK-NEXT: zip1 z16.s, z22.s, z16.s +; CHECK-NEXT: zip1 z21.s, z24.s, z23.s +; CHECK-NEXT: zip1 z1.h, z1.h, z3.h +; CHECK-NEXT: zip1 z3.s, z26.s, z25.s +; CHECK-NEXT: zip1 z22.s, z30.s, z29.s +; CHECK-NEXT: zip1 z6.h, z6.h, z7.h +; CHECK-NEXT: zip1 z7.d, z17.d, z19.d +; CHECK-NEXT: zip1 z17.d, z20.d, z18.d +; CHECK-NEXT: zip1 z0.h, z0.h, z2.h +; CHECK-NEXT: zip1 z2.h, z4.h, z5.h +; CHECK-NEXT: zip1 z4.d, z21.d, z16.d +; CHECK-NEXT: zip1 z3.d, z22.d, z3.d +; CHECK-NEXT: add z1.h, z1.h, z6.h +; CHECK-NEXT: add z5.h, z7.h, z17.h +; CHECK-NEXT: add z0.h, z0.h, z2.h +; CHECK-NEXT: add z2.h, z4.h, z3.h +; CHECK-NEXT: stp q1, q5, [x0, #32] +; CHECK-NEXT: stp q0, q2, [x0] +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip_v32i16: @@ -436,41 +389,28 @@ define void @zip_v32i16(ptr %a, ptr %b) { define void @zip1_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: zip1_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: mov z2.h, z0.h[7] -; CHECK-NEXT: mov z3.h, z0.h[6] -; CHECK-NEXT: mov z4.h, z0.h[5] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[4] -; CHECK-NEXT: fmov w9, s3 +; CHECK-NEXT: mov z4.h, z0.h[6] +; CHECK-NEXT: mov z6.h, z0.h[5] ; CHECK-NEXT: mov z3.h, z1.h[7] +; CHECK-NEXT: mov z5.h, z1.h[6] +; CHECK-NEXT: mov z7.h, z1.h[5] +; CHECK-NEXT: mov z16.h, z0.h[4] +; CHECK-NEXT: mov z17.h, z1.h[4] ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z1.h[6] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z1.h[5] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.h, z1.h[4] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z2.h, z2.h, z3.h +; CHECK-NEXT: zip1 z3.h, z4.h, z5.h +; CHECK-NEXT: zip1 z4.h, z6.h, z7.h +; CHECK-NEXT: zip1 z5.h, z16.h, z17.h ; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: zip1 z2.s, z3.s, z2.s +; CHECK-NEXT: zip1 z3.s, z5.s, z4.s +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: str q1, [x0, #16] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip1_v16i16: @@ -530,8 +470,6 @@ define void @zip1_v16i16(ptr %a, ptr %b) { define void @zip1_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: zip1_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1, #16] @@ -539,18 +477,13 @@ define void @zip1_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z2.s, z0.s[3] ; CHECK-NEXT: mov z4.s, z0.s[2] ; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: mov z5.s, z1.s[2] ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z1.s[2] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: stp w8, w9, [sp, #8] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: stp w8, w9, [sp] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z2.s, z2.s, z3.s +; CHECK-NEXT: zip1 z3.s, z4.s, z5.s ; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: str q1, [x0, #16] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip1_v8i32: @@ -636,25 +569,18 @@ define void @zip_v4f64(ptr %a, ptr %b) { define void @zip_v4i32(ptr %a, ptr %b) { ; CHECK-LABEL: zip_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: mov z2.s, z0.s[3] ; CHECK-NEXT: mov z3.s, z1.s[3] ; CHECK-NEXT: mov z4.s, z0.s[2] +; CHECK-NEXT: mov z5.s, z1.s[2] ; CHECK-NEXT: zip1 z0.s, z1.s, z0.s -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z1.s[2] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: stp w9, w8, [sp, #8] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: stp w9, w8, [sp] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z2.s, z3.s, z2.s +; CHECK-NEXT: zip1 z3.s, z5.s, z4.s +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip_v4i32: @@ -1209,65 +1135,44 @@ define void @trn_v8i32_undef(ptr %a) { define void @zip2_v32i8(ptr %a, ptr %b) #0{ ; CHECK-LABEL: zip2_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: mov z2.b, z0.b[15] -; CHECK-NEXT: mov z3.b, z0.b[14] -; CHECK-NEXT: mov z4.b, z0.b[13] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: mov z3.b, z0.b[11] -; CHECK-NEXT: mov z2.b, z0.b[12] -; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z0.b[10] -; CHECK-NEXT: strb w9, [sp, #12] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[9] -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z0.b[8] -; CHECK-NEXT: strb w9, [sp, #8] +; CHECK-NEXT: mov z4.b, z0.b[14] +; CHECK-NEXT: mov z6.b, z0.b[13] +; CHECK-NEXT: mov z3.b, z1.b[15] +; CHECK-NEXT: mov z5.b, z1.b[14] +; CHECK-NEXT: mov z7.b, z1.b[13] +; CHECK-NEXT: mov z16.b, z0.b[12] +; CHECK-NEXT: mov z17.b, z1.b[12] +; CHECK-NEXT: mov z18.b, z0.b[11] +; CHECK-NEXT: mov z19.b, z1.b[11] +; CHECK-NEXT: mov z20.b, z0.b[10] +; CHECK-NEXT: mov z21.b, z1.b[10] +; CHECK-NEXT: mov z22.b, z0.b[9] +; CHECK-NEXT: mov z23.b, z1.b[9] +; CHECK-NEXT: mov z24.b, z0.b[8] +; CHECK-NEXT: mov z25.b, z1.b[8] +; CHECK-NEXT: zip1 z2.b, z2.b, z3.b +; CHECK-NEXT: zip1 z3.b, z4.b, z5.b +; CHECK-NEXT: zip1 z4.b, z6.b, z7.b +; CHECK-NEXT: zip1 z5.b, z16.b, z17.b +; CHECK-NEXT: zip1 z6.b, z18.b, z19.b +; CHECK-NEXT: zip1 z7.b, z20.b, z21.b +; CHECK-NEXT: zip1 z16.b, z22.b, z23.b ; CHECK-NEXT: zip1 z0.b, z0.b, z1.b -; CHECK-NEXT: strb w8, [sp, #6] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[15] -; CHECK-NEXT: strb w8, [sp, #4] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[14] -; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z1.b[13] -; CHECK-NEXT: strb w8, [sp] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[12] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[11] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z1.b[10] -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[9] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[8] -; CHECK-NEXT: strb w9, [sp, #5] -; CHECK-NEXT: strb w8, [sp, #7] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strb w8, [sp, #3] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z17.b, z24.b, z25.b +; CHECK-NEXT: zip1 z2.h, z3.h, z2.h +; CHECK-NEXT: zip1 z3.h, z5.h, z4.h +; CHECK-NEXT: zip1 z4.h, z7.h, z6.h ; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: zip1 z5.h, z17.h, z16.h +; CHECK-NEXT: zip1 z2.s, z3.s, z2.s +; CHECK-NEXT: zip1 z3.s, z5.s, z4.s +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: str q1, [x0, #16] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip2_v32i8: @@ -1359,41 +1264,28 @@ define void @zip2_v32i8(ptr %a, ptr %b) #0{ define void @zip2_v16i16(ptr %a, ptr %b) #0{ ; CHECK-LABEL: zip2_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: mov z2.h, z0.h[7] -; CHECK-NEXT: mov z3.h, z0.h[6] -; CHECK-NEXT: mov z4.h, z0.h[5] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[4] -; CHECK-NEXT: fmov w9, s3 +; CHECK-NEXT: mov z4.h, z0.h[6] +; CHECK-NEXT: mov z6.h, z0.h[5] ; CHECK-NEXT: mov z3.h, z1.h[7] +; CHECK-NEXT: mov z5.h, z1.h[6] +; CHECK-NEXT: mov z7.h, z1.h[5] +; CHECK-NEXT: mov z16.h, z0.h[4] +; CHECK-NEXT: mov z17.h, z1.h[4] ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z1.h[6] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z1.h[5] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.h, z1.h[4] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w9, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z2.h, z2.h, z3.h +; CHECK-NEXT: zip1 z3.h, z4.h, z5.h +; CHECK-NEXT: zip1 z4.h, z6.h, z7.h +; CHECK-NEXT: zip1 z5.h, z16.h, z17.h ; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: zip1 z2.s, z3.s, z2.s +; CHECK-NEXT: zip1 z3.s, z5.s, z4.s +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: str q1, [x0, #16] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip2_v16i16: @@ -1453,8 +1345,6 @@ define void @zip2_v16i16(ptr %a, ptr %b) #0{ define void @zip2_v8i32(ptr %a, ptr %b) #0{ ; CHECK-LABEL: zip2_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q1, [x1] @@ -1462,18 +1352,13 @@ define void @zip2_v8i32(ptr %a, ptr %b) #0{ ; CHECK-NEXT: mov z2.s, z0.s[3] ; CHECK-NEXT: mov z4.s, z0.s[2] ; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: mov z5.s, z1.s[2] ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z1.s[2] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: stp w8, w9, [sp, #8] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: stp w8, w9, [sp] -; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: zip1 z2.s, z2.s, z3.s +; CHECK-NEXT: zip1 z3.s, z4.s, z5.s ; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: str q1, [x0, #16] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip2_v8i32: @@ -1547,197 +1432,139 @@ define void @zip2_v8i32_undef(ptr %a) #0{ define void @uzp_v32i8(ptr %a, ptr %b) #0{ ; CHECK-LABEL: uzp_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: mov z4.b, z3.b[14] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z6.b, z3.b[10] -; CHECK-NEXT: mov z5.b, z3.b[12] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z7.b, z3.b[8] -; CHECK-NEXT: mov z17.b, z3.b[9] -; CHECK-NEXT: mov z18.b, z3.b[7] -; CHECK-NEXT: mov z16.b, z3.b[11] -; CHECK-NEXT: strb w8, [sp, #40] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z3.b[6] -; CHECK-NEXT: strb w9, [sp, #32] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: mov z5.b, z3.b[4] -; CHECK-NEXT: strb w8, [sp, #47] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z3.b[2] -; CHECK-NEXT: strb w9, [sp, #46] -; CHECK-NEXT: fmov w9, s7 -; CHECK-NEXT: mov z7.b, z2.b[14] -; CHECK-NEXT: strb w8, [sp, #45] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z2.b[12] -; CHECK-NEXT: strb w9, [sp, #44] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: mov z16.b, z2.b[11] -; CHECK-NEXT: strb w8, [sp, #43] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.b, z2.b[10] -; CHECK-NEXT: strb w9, [sp, #61] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: strb w8, [sp, #42] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z2.b[8] -; CHECK-NEXT: strb w9, [sp, #53] -; CHECK-NEXT: strb w8, [sp, #41] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z2.b[6] -; CHECK-NEXT: strb w8, [sp, #39] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z2.b[4] -; CHECK-NEXT: strb w8, [sp, #38] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.b, z2.b[2] -; CHECK-NEXT: strb w8, [sp, #37] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z1.b[10] -; CHECK-NEXT: strb w8, [sp, #36] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z1.b[8] -; CHECK-NEXT: strb w8, [sp, #35] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[14] -; CHECK-NEXT: strb w8, [sp, #34] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.b, z1.b[12] -; CHECK-NEXT: strb w8, [sp, #33] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [sp] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z1.b[6] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.b, z1.b[4] -; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z1.b[2] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z0.b[14] -; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z0.b[12] -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.b, z0.b[10] -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z0.b[8] -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z0.b[6] -; CHECK-NEXT: strb w8, [sp, #7] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.b, z0.b[4] -; CHECK-NEXT: strb w8, [sp, #6] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.b, z0.b[2] -; CHECK-NEXT: strb w8, [sp, #5] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z3.b[15] -; CHECK-NEXT: strb w8, [sp, #4] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z3.b[13] -; CHECK-NEXT: strb w8, [sp, #3] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: ldr q4, [sp, #32] -; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z3.b[5] -; CHECK-NEXT: mov z3.b, z3.b[3] -; CHECK-NEXT: ldr q5, [sp] -; CHECK-NEXT: strb w8, [sp, #63] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z2.b[13] -; CHECK-NEXT: strb w8, [sp, #62] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: strb w8, [sp, #60] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: strb w8, [sp, #59] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z2.b[9] -; CHECK-NEXT: strb w8, [sp, #58] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z2.b[5] -; CHECK-NEXT: strb w8, [sp, #57] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z2.b[3] +; CHECK-NEXT: stp d13, d12, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset b8, -8 +; CHECK-NEXT: .cfi_offset b9, -16 +; CHECK-NEXT: .cfi_offset b10, -24 +; CHECK-NEXT: .cfi_offset b11, -32 +; CHECK-NEXT: .cfi_offset b12, -40 +; CHECK-NEXT: .cfi_offset b13, -48 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.b, z1.b[14] +; CHECK-NEXT: mov z3.b, z1.b[12] +; CHECK-NEXT: mov z4.b, z1.b[10] +; CHECK-NEXT: mov z5.b, z1.b[8] +; CHECK-NEXT: mov z6.b, z1.b[6] +; CHECK-NEXT: mov z7.b, z1.b[4] +; CHECK-NEXT: mov z16.b, z1.b[2] +; CHECK-NEXT: mov z18.b, z0.b[14] +; CHECK-NEXT: mov z19.b, z0.b[12] +; CHECK-NEXT: zip1 z3.b, z3.b, z2.b +; CHECK-NEXT: ldp q2, q17, [x1] +; CHECK-NEXT: mov z20.b, z0.b[10] +; CHECK-NEXT: zip1 z4.b, z5.b, z4.b +; CHECK-NEXT: zip1 z5.b, z7.b, z6.b +; CHECK-NEXT: zip1 z6.b, z1.b, z16.b +; CHECK-NEXT: mov z7.b, z0.b[8] +; CHECK-NEXT: mov z16.b, z0.b[6] +; CHECK-NEXT: mov z21.b, z0.b[4] +; CHECK-NEXT: mov z22.b, z0.b[2] +; CHECK-NEXT: mov z23.b, z17.b[14] +; CHECK-NEXT: mov z24.b, z17.b[12] +; CHECK-NEXT: mov z25.b, z17.b[10] +; CHECK-NEXT: mov z26.b, z17.b[8] +; CHECK-NEXT: mov z27.b, z17.b[6] +; CHECK-NEXT: mov z28.b, z17.b[4] +; CHECK-NEXT: mov z29.b, z17.b[2] +; CHECK-NEXT: zip1 z18.b, z19.b, z18.b +; CHECK-NEXT: zip1 z7.b, z7.b, z20.b +; CHECK-NEXT: zip1 z16.b, z21.b, z16.b +; CHECK-NEXT: zip1 z19.b, z0.b, z22.b +; CHECK-NEXT: zip1 z20.b, z24.b, z23.b +; CHECK-NEXT: zip1 z21.b, z26.b, z25.b +; CHECK-NEXT: zip1 z22.b, z28.b, z27.b +; CHECK-NEXT: mov z24.b, z2.b[14] +; CHECK-NEXT: mov z25.b, z2.b[12] +; CHECK-NEXT: mov z26.b, z2.b[10] +; CHECK-NEXT: mov z27.b, z2.b[8] +; CHECK-NEXT: zip1 z23.b, z17.b, z29.b +; CHECK-NEXT: zip1 z3.h, z4.h, z3.h +; CHECK-NEXT: zip1 z4.h, z6.h, z5.h +; CHECK-NEXT: zip1 z5.h, z7.h, z18.h +; CHECK-NEXT: zip1 z6.h, z19.h, z16.h +; CHECK-NEXT: zip1 z7.h, z21.h, z20.h +; CHECK-NEXT: zip1 z18.b, z25.b, z24.b +; CHECK-NEXT: zip1 z19.b, z27.b, z26.b +; CHECK-NEXT: mov z20.b, z2.b[6] +; CHECK-NEXT: mov z21.b, z2.b[4] +; CHECK-NEXT: mov z29.b, z17.b[3] +; CHECK-NEXT: mov z30.b, z17.b[1] +; CHECK-NEXT: mov z31.b, z2.b[15] +; CHECK-NEXT: mov z8.b, z2.b[13] +; CHECK-NEXT: zip1 z16.h, z23.h, z22.h +; CHECK-NEXT: mov z22.b, z2.b[2] +; CHECK-NEXT: mov z23.b, z17.b[15] +; CHECK-NEXT: mov z24.b, z17.b[13] +; CHECK-NEXT: mov z25.b, z17.b[11] +; CHECK-NEXT: mov z26.b, z17.b[9] +; CHECK-NEXT: mov z27.b, z17.b[7] +; CHECK-NEXT: mov z28.b, z17.b[5] +; CHECK-NEXT: zip1 z17.h, z19.h, z18.h +; CHECK-NEXT: zip1 z21.b, z21.b, z20.b +; CHECK-NEXT: zip1 z19.b, z30.b, z29.b +; CHECK-NEXT: zip1 z20.b, z8.b, z31.b +; CHECK-NEXT: mov z29.b, z1.b[15] +; CHECK-NEXT: mov z30.b, z1.b[13] +; CHECK-NEXT: mov z31.b, z1.b[11] +; CHECK-NEXT: mov z8.b, z1.b[9] +; CHECK-NEXT: zip1 z22.b, z2.b, z22.b +; CHECK-NEXT: zip1 z23.b, z24.b, z23.b +; CHECK-NEXT: zip1 z24.b, z26.b, z25.b +; CHECK-NEXT: zip1 z18.b, z28.b, z27.b +; CHECK-NEXT: mov z25.b, z2.b[11] +; CHECK-NEXT: mov z26.b, z2.b[9] +; CHECK-NEXT: mov z27.b, z2.b[7] +; CHECK-NEXT: mov z28.b, z2.b[5] +; CHECK-NEXT: mov z9.b, z1.b[7] +; CHECK-NEXT: mov z10.b, z1.b[5] +; CHECK-NEXT: mov z1.b, z1.b[3] +; CHECK-NEXT: mov z11.b, z0.b[11] +; CHECK-NEXT: mov z12.b, z0.b[9] +; CHECK-NEXT: zip1 z29.b, z30.b, z29.b +; CHECK-NEXT: mov z30.b, z0.b[3] +; CHECK-NEXT: mov z13.b, z0.b[1] +; CHECK-NEXT: zip1 z31.b, z8.b, z31.b +; CHECK-NEXT: mov z8.b, z2.b[3] ; CHECK-NEXT: mov z2.b, z2.b[1] -; CHECK-NEXT: strb w8, [sp, #54] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z1.b[15] -; CHECK-NEXT: strb w8, [sp, #52] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z1.b[13] -; CHECK-NEXT: strb w8, [sp, #50] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z1.b[11] -; CHECK-NEXT: strb w8, [sp, #49] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[9] -; CHECK-NEXT: strb w8, [sp, #48] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z1.b[7] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[15] -; CHECK-NEXT: strb w8, [sp, #31] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z1.b[5] -; CHECK-NEXT: strb w9, [sp, #28] -; CHECK-NEXT: strb w8, [sp, #30] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.b, z1.b[3] -; CHECK-NEXT: mov z1.b, z1.b[1] -; CHECK-NEXT: strb w8, [sp, #29] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z0.b[11] -; CHECK-NEXT: strb w8, [sp, #27] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z0.b[13] -; CHECK-NEXT: strb w8, [sp, #26] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: strb w8, [sp, #25] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.b, z0.b[9] -; CHECK-NEXT: strb w8, [sp, #24] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[7] -; CHECK-NEXT: strb w8, [sp, #23] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z3.b, z0.b[5] -; CHECK-NEXT: strb w8, [sp, #22] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.b, z0.b[3] -; CHECK-NEXT: mov z0.b, z0.b[1] -; CHECK-NEXT: strb w8, [sp, #21] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strb w8, [sp, #20] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w8, [sp, #19] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strb w8, [sp, #18] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: strb w8, [sp, #17] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ldr q0, [sp, #48] -; CHECK-NEXT: add z0.b, z4.b, z0.b -; CHECK-NEXT: strb w8, [sp, #16] -; CHECK-NEXT: ldr q1, [sp, #16] -; CHECK-NEXT: add z1.b, z5.b, z1.b +; CHECK-NEXT: zip1 z9.b, z10.b, z9.b +; CHECK-NEXT: zip1 z10.b, z12.b, z11.b +; CHECK-NEXT: zip1 z1.b, z0.b, z1.b +; CHECK-NEXT: zip1 z30.b, z13.b, z30.b +; CHECK-NEXT: mov z11.b, z0.b[13] +; CHECK-NEXT: mov z0.b, z0.b[5] +; CHECK-NEXT: zip1 z25.b, z26.b, z25.b +; CHECK-NEXT: zip1 z26.b, z28.b, z27.b +; CHECK-NEXT: zip1 z2.b, z2.b, z8.b +; CHECK-NEXT: zip1 z21.h, z22.h, z21.h +; CHECK-NEXT: zip1 z22.h, z24.h, z23.h +; CHECK-NEXT: zip1 z23.h, z31.h, z29.h +; CHECK-NEXT: zip1 z1.h, z1.h, z9.h +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: zip1 z24.h, z10.h, z11.h +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: zip1 z0.h, z30.h, z0.h +; CHECK-NEXT: zip1 z18.h, z19.h, z18.h +; CHECK-NEXT: zip1 z19.h, z25.h, z20.h +; CHECK-NEXT: zip1 z2.h, z2.h, z26.h +; CHECK-NEXT: zip1 z3.s, z4.s, z3.s +; CHECK-NEXT: zip1 z4.s, z6.s, z5.s +; CHECK-NEXT: zip1 z5.s, z16.s, z7.s +; CHECK-NEXT: zip1 z1.s, z1.s, z23.s +; CHECK-NEXT: zip1 z6.s, z21.s, z17.s +; CHECK-NEXT: zip1 z0.s, z0.s, z24.s +; CHECK-NEXT: zip1 z7.s, z18.s, z22.s +; CHECK-NEXT: zip1 z2.s, z2.s, z19.s +; CHECK-NEXT: zip1 z3.d, z4.d, z3.d +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: zip1 z1.d, z6.d, z5.d +; CHECK-NEXT: zip1 z2.d, z2.d, z7.d +; CHECK-NEXT: add z0.b, z3.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z2.b ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ldp d13, d12, [sp], #48 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uzp_v32i8: @@ -1922,110 +1749,71 @@ define void @uzp_v4i16(ptr %a, ptr %b) #0{ define void @uzp_v16i16(ptr %a, ptr %b) #0{ ; CHECK-LABEL: uzp_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: mov z4.h, z3.h[6] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z6.h, z3.h[2] -; CHECK-NEXT: mov z5.h, z3.h[4] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z7.h, z2.h[6] -; CHECK-NEXT: mov z17.h, z2.h[7] -; CHECK-NEXT: mov z16.h, z3.h[1] -; CHECK-NEXT: strh w8, [sp, #40] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z2.h[4] -; CHECK-NEXT: strh w9, [sp, #32] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: mov z5.h, z2.h[2] -; CHECK-NEXT: strh w8, [sp, #46] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.h, z1.h[2] -; CHECK-NEXT: strh w9, [sp, #44] -; CHECK-NEXT: fmov w9, s7 -; CHECK-NEXT: mov z7.h, z0.h[6] -; CHECK-NEXT: strh w8, [sp, #42] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z1.h[6] -; CHECK-NEXT: strh w9, [sp, #38] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: strh w8, [sp, #36] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z1.h[4] -; CHECK-NEXT: strh w9, [sp, #56] -; CHECK-NEXT: strh w8, [sp, #34] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z0.h[4] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z0.h[2] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.h, z3.h[7] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.h, z3.h[5] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z3.h[3] -; CHECK-NEXT: ldr q3, [sp, #32] -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.h, z2.h[5] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: strh w8, [sp, #62] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: mov z7.h, z1.h[7] -; CHECK-NEXT: strh w8, [sp, #60] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z2.h[3] -; CHECK-NEXT: mov z2.h, z2.h[1] -; CHECK-NEXT: strh w8, [sp, #58] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z0.h[7] -; CHECK-NEXT: strh w8, [sp, #54] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.h, z1.h[5] -; CHECK-NEXT: strh w9, [sp, #48] -; CHECK-NEXT: strh w8, [sp, #52] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z1.h[3] +; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: ldp q1, q6, [x0] +; CHECK-NEXT: ldp q0, q2, [x1] +; CHECK-NEXT: mov z3.h, z6.h[6] +; CHECK-NEXT: mov z4.h, z6.h[4] +; CHECK-NEXT: mov z5.h, z6.h[2] +; CHECK-NEXT: mov z7.h, z1.h[6] +; CHECK-NEXT: mov z16.h, z1.h[4] +; CHECK-NEXT: mov z17.h, z1.h[2] +; CHECK-NEXT: mov z18.h, z2.h[6] +; CHECK-NEXT: mov z19.h, z2.h[4] +; CHECK-NEXT: mov z20.h, z2.h[2] +; CHECK-NEXT: mov z21.h, z0.h[6] +; CHECK-NEXT: mov z22.h, z0.h[4] +; CHECK-NEXT: zip1 z3.h, z4.h, z3.h +; CHECK-NEXT: zip1 z4.h, z6.h, z5.h +; CHECK-NEXT: zip1 z5.h, z16.h, z7.h +; CHECK-NEXT: zip1 z7.h, z1.h, z17.h +; CHECK-NEXT: zip1 z16.h, z19.h, z18.h +; CHECK-NEXT: zip1 z18.h, z2.h, z20.h +; CHECK-NEXT: mov z19.h, z0.h[2] +; CHECK-NEXT: zip1 z17.h, z22.h, z21.h +; CHECK-NEXT: mov z20.h, z6.h[7] +; CHECK-NEXT: mov z21.h, z6.h[5] +; CHECK-NEXT: mov z22.h, z6.h[3] +; CHECK-NEXT: mov z6.h, z6.h[1] +; CHECK-NEXT: mov z23.h, z1.h[7] +; CHECK-NEXT: mov z24.h, z1.h[5] +; CHECK-NEXT: mov z25.h, z1.h[3] ; CHECK-NEXT: mov z1.h, z1.h[1] -; CHECK-NEXT: strh w8, [sp, #50] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: strh w8, [sp, #30] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z6.h, z0.h[5] -; CHECK-NEXT: strh w8, [sp, #28] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: strh w8, [sp, #26] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #22] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: strh w8, [sp, #20] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strh w8, [sp, #18] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ldr q0, [sp, #48] -; CHECK-NEXT: add z0.h, z3.h, z0.h -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: ldr q1, [sp, #16] -; CHECK-NEXT: add z1.h, z4.h, z1.h -; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: mov z26.h, z2.h[7] +; CHECK-NEXT: mov z27.h, z2.h[5] +; CHECK-NEXT: mov z28.h, z2.h[3] +; CHECK-NEXT: mov z2.h, z2.h[1] +; CHECK-NEXT: mov z29.h, z0.h[7] +; CHECK-NEXT: mov z30.h, z0.h[5] +; CHECK-NEXT: mov z31.h, z0.h[3] +; CHECK-NEXT: mov z8.h, z0.h[1] +; CHECK-NEXT: zip1 z0.h, z0.h, z19.h +; CHECK-NEXT: zip1 z19.h, z21.h, z20.h +; CHECK-NEXT: zip1 z6.h, z6.h, z22.h +; CHECK-NEXT: zip1 z20.h, z24.h, z23.h +; CHECK-NEXT: zip1 z1.h, z1.h, z25.h +; CHECK-NEXT: zip1 z21.h, z27.h, z26.h +; CHECK-NEXT: zip1 z2.h, z2.h, z28.h +; CHECK-NEXT: zip1 z22.h, z30.h, z29.h +; CHECK-NEXT: zip1 z23.h, z8.h, z31.h +; CHECK-NEXT: zip1 z3.s, z4.s, z3.s +; CHECK-NEXT: zip1 z4.s, z7.s, z5.s +; CHECK-NEXT: zip1 z5.s, z18.s, z16.s +; CHECK-NEXT: zip1 z6.s, z6.s, z19.s +; CHECK-NEXT: zip1 z1.s, z1.s, z20.s +; CHECK-NEXT: zip1 z0.s, z0.s, z17.s +; CHECK-NEXT: zip1 z2.s, z2.s, z21.s +; CHECK-NEXT: zip1 z7.s, z23.s, z22.s +; CHECK-NEXT: zip1 z3.d, z4.d, z3.d +; CHECK-NEXT: zip1 z1.d, z1.d, z6.d +; CHECK-NEXT: zip1 z0.d, z0.d, z5.d +; CHECK-NEXT: zip1 z2.d, z7.d, z2.d +; CHECK-NEXT: add z1.h, z3.h, z1.h +; CHECK-NEXT: add z0.h, z0.h, z2.h +; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uzp_v16i16: @@ -2116,32 +1904,28 @@ define void @uzp_v16i16(ptr %a, ptr %b) #0{ define void @uzp_v8f32(ptr %a, ptr %b) #0{ ; CHECK-LABEL: uzp_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: ldp q2, q0, [x0] +; CHECK-NEXT: ldp q6, q0, [x0] ; CHECK-NEXT: adrp x8, .LCPI21_0 -; CHECK-NEXT: ldp q4, q1, [x1] +; CHECK-NEXT: ldp q1, q2, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov z3.s, z0.s[2] -; CHECK-NEXT: mov z5.s, z1.s[2] -; CHECK-NEXT: stp s0, s3, [sp, #24] -; CHECK-NEXT: mov z3.s, z4.s[2] -; CHECK-NEXT: stp s5, s2, [sp, #12] -; CHECK-NEXT: mov z5.s, z0.s[3] -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: stp s3, s1, [sp, #4] -; CHECK-NEXT: mov z1.s, z2.s[1] -; CHECK-NEXT: str s5, [sp, #44] +; CHECK-NEXT: mov z4.s, z0.s[3] +; CHECK-NEXT: mov z5.s, z0.s[1] +; CHECK-NEXT: mov z7.s, z2.s[2] +; CHECK-NEXT: mov z16.s, z1.s[2] +; CHECK-NEXT: zip1 z0.s, z0.s, z3.s +; CHECK-NEXT: zip1 z3.s, z5.s, z4.s +; CHECK-NEXT: mov z4.s, z6.s[1] +; CHECK-NEXT: zip1 z2.s, z2.s, z7.s ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: str s0, [sp, #40] -; CHECK-NEXT: ldp q3, q2, [sp] -; CHECK-NEXT: tbl z0.s, { z4.s }, z5.s -; CHECK-NEXT: str s1, [sp, #32] -; CHECK-NEXT: ldr q1, [sp, #32] -; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: zip1 z7.s, z0.s, z16.s +; CHECK-NEXT: tbl z1.s, { z1.s }, z5.s +; CHECK-NEXT: zip1 z0.d, z6.d, z0.d +; CHECK-NEXT: zip1 z3.d, z4.d, z3.d +; CHECK-NEXT: zip1 z2.d, z7.d, z2.d ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z3.s -; CHECK-NEXT: stp q1, q0, [x0] -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uzp_v8f32: @@ -2231,60 +2015,38 @@ define void @uzp_v4i64(ptr %a, ptr %b) #0{ define void @uzp_v8i16(ptr %a, ptr %b) #0{ ; CHECK-LABEL: uzp_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: mov z2.h, z1.h[6] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z4.h, z1.h[2] -; CHECK-NEXT: mov z6.h, z0.h[4] -; CHECK-NEXT: mov z3.h, z1.h[4] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z5.h, z0.h[6] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[2] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: mov z3.h, z1.h[7] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z1.h[5] -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: mov z5.h, z1.h[3] -; CHECK-NEXT: mov z1.h, z1.h[1] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: strh w9, [sp, #6] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[7] -; CHECK-NEXT: strh w9, [sp, #24] -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w8, [sp, #30] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: mov z4.h, z0.h[5] -; CHECK-NEXT: strh w8, [sp, #28] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: mov z5.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: strh w8, [sp, #26] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strh w8, [sp, #22] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strh w8, [sp, #20] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strh w8, [sp, #18] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: ldp q3, q0, [sp] -; CHECK-NEXT: add z0.h, z3.h, z0.h +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: mov z2.h, z0.h[6] +; CHECK-NEXT: mov z3.h, z0.h[4] +; CHECK-NEXT: mov z4.h, z0.h[2] +; CHECK-NEXT: mov z5.h, z1.h[6] +; CHECK-NEXT: mov z6.h, z1.h[4] +; CHECK-NEXT: mov z7.h, z1.h[2] +; CHECK-NEXT: mov z16.h, z0.h[7] +; CHECK-NEXT: mov z17.h, z0.h[5] +; CHECK-NEXT: mov z18.h, z0.h[3] +; CHECK-NEXT: mov z19.h, z0.h[1] +; CHECK-NEXT: mov z20.h, z1.h[7] +; CHECK-NEXT: mov z21.h, z1.h[5] +; CHECK-NEXT: mov z22.h, z1.h[3] +; CHECK-NEXT: mov z23.h, z1.h[1] +; CHECK-NEXT: zip1 z2.h, z3.h, z2.h +; CHECK-NEXT: zip1 z0.h, z0.h, z4.h +; CHECK-NEXT: zip1 z3.h, z6.h, z5.h +; CHECK-NEXT: zip1 z1.h, z1.h, z7.h +; CHECK-NEXT: zip1 z4.h, z17.h, z16.h +; CHECK-NEXT: zip1 z5.h, z19.h, z18.h +; CHECK-NEXT: zip1 z6.h, z21.h, z20.h +; CHECK-NEXT: zip1 z7.h, z23.h, z22.h +; CHECK-NEXT: zip1 z0.s, z0.s, z2.s +; CHECK-NEXT: zip1 z1.s, z1.s, z3.s +; CHECK-NEXT: zip1 z2.s, z5.s, z4.s +; CHECK-NEXT: zip1 z3.s, z7.s, z6.s +; CHECK-NEXT: zip1 z0.d, z1.d, z0.d +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d +; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uzp_v8i16: @@ -2341,31 +2103,21 @@ define void @uzp_v8i16(ptr %a, ptr %b) #0{ define void @uzp_v8i32_undef(ptr %a) #0{ ; CHECK-LABEL: uzp_v8i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: mov z2.s, z0.s[2] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z3.s, z1.s[2] -; CHECK-NEXT: mov z4.s, z0.s[3] -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.s, z1.s[3] -; CHECK-NEXT: stp w8, w9, [sp, #8] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: mov z1.s, z1.s[1] -; CHECK-NEXT: stp w8, w9, [sp] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: stp w9, w8, [sp, #24] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: stp w9, w8, [sp, #16] -; CHECK-NEXT: ldp q0, q1, [sp] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.s, z1.s[2] +; CHECK-NEXT: mov z3.s, z0.s[2] +; CHECK-NEXT: mov z4.s, z1.s[3] +; CHECK-NEXT: mov z5.s, z1.s[1] +; CHECK-NEXT: mov z6.s, z0.s[3] +; CHECK-NEXT: mov z7.s, z0.s[1] +; CHECK-NEXT: zip1 z1.s, z1.s, z2.s +; CHECK-NEXT: zip1 z0.s, z0.s, z3.s +; CHECK-NEXT: zip1 z2.s, z5.s, z4.s +; CHECK-NEXT: zip1 z3.s, z7.s, z6.s +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uzp_v8i32_undef: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll index 88c83a214c7394..c942f1eca8ebaf 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll @@ -10,22 +10,14 @@ target triple = "aarch64-unknown-linux-gnu" define <4 x i1> @reshuffle_v4i1_nxv4i1( %a) { ; CHECK-LABEL: reshuffle_v4i1_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z1.s, z0.s[3] -; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z2.s, z0.s[2] ; CHECK-NEXT: mov z3.s, z0.s[1] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: zip1 z1.h, z2.h, z1.h +; CHECK-NEXT: zip1 z0.h, z0.h, z3.h +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %a, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/AMDGPU/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..5ff2d82c1464f2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/naked-fn-with-frame-pointer.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple amdgcn | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: naked$local: +; CHECK-NEXT: .type naked$local,@function +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, main@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, main@rel32@hi+12 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: normal$local: +; CHECK-NEXT: .type normal$local,@function +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, main@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, main@rel32@hi+12 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/ARM/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/ARM/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..2bdc7d3e29b981 --- /dev/null +++ b/llvm/test/CodeGen/ARM/naked-fn-with-frame-pointer.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple arm | FileCheck %s -check-prefixes=CHECK-ALE +; RUN: llc < %s -mtriple armeb | FileCheck %s -check-prefixes=CHECK-ABE +; RUN: llc < %s -mtriple thumb | FileCheck %s -check-prefixes=CHECK-TLE +; RUN: llc < %s -mtriple thumbeb | FileCheck %s -check-prefixes=CHECK-TBE + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-ALE-LABEL: naked: +; CHECK-ALE: @ %bb.0: +; CHECK-ALE-NEXT: bl main +; +; CHECK-ABE-LABEL: naked: +; CHECK-ABE: @ %bb.0: +; CHECK-ABE-NEXT: bl main +; +; CHECK-TLE-LABEL: naked: +; CHECK-TLE: @ %bb.0: +; CHECK-TLE-NEXT: bl main +; +; CHECK-TBE-LABEL: naked: +; CHECK-TBE: @ %bb.0: +; CHECK-TBE-NEXT: bl main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-ALE-LABEL: normal: +; CHECK-ALE: @ %bb.0: +; CHECK-ALE-NEXT: push {r11, lr} +; CHECK-ALE-NEXT: mov r11, sp +; CHECK-ALE-NEXT: bl main +; +; CHECK-ABE-LABEL: normal: +; CHECK-ABE: @ %bb.0: +; CHECK-ABE-NEXT: push {r11, lr} +; CHECK-ABE-NEXT: mov r11, sp +; CHECK-ABE-NEXT: bl main +; +; CHECK-TLE-LABEL: normal: +; CHECK-TLE: @ %bb.0: +; CHECK-TLE-NEXT: push {r7, lr} +; CHECK-TLE-NEXT: add r7, sp, #0 +; CHECK-TLE-NEXT: bl main +; +; CHECK-TBE-LABEL: normal: +; CHECK-TBE: @ %bb.0: +; CHECK-TBE-NEXT: push {r7, lr} +; CHECK-TBE-NEXT: add r7, sp, #0 +; CHECK-TBE-NEXT: bl main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/AVR/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/AVR/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..18ea60906bd0cd --- /dev/null +++ b/llvm/test/CodeGen/AVR/naked-fn-with-frame-pointer.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple avr | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: ; %bb.0: +; CHECK-NEXT: rcall main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: rcall main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/BPF/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/BPF/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..4e4436296f3b56 --- /dev/null +++ b/llvm/test/CodeGen/BPF/naked-fn-with-frame-pointer.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple bpfel | FileCheck %s -check-prefixes=CHECK-LE +; RUN: llc < %s -mtriple bpfeb | FileCheck %s -check-prefixes=CHECK-BE + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LE-LABEL: naked: +; CHECK-LE: .Lnaked$local: +; CHECK-LE-NEXT: .type .Lnaked$local,@function +; CHECK-LE-NEXT: .cfi_startproc +; CHECK-LE-NEXT: # %bb.0: +; CHECK-LE-NEXT: call main +; +; CHECK-BE-LABEL: naked: +; CHECK-BE: .Lnaked$local: +; CHECK-BE-NEXT: .type .Lnaked$local,@function +; CHECK-BE-NEXT: .cfi_startproc +; CHECK-BE-NEXT: # %bb.0: +; CHECK-BE-NEXT: call main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LE-LABEL: normal: +; CHECK-LE: .Lnormal$local: +; CHECK-LE-NEXT: .type .Lnormal$local,@function +; CHECK-LE-NEXT: .cfi_startproc +; CHECK-LE-NEXT: # %bb.0: +; CHECK-LE-NEXT: call main +; +; CHECK-BE-LABEL: normal: +; CHECK-BE: .Lnormal$local: +; CHECK-BE-NEXT: .type .Lnormal$local,@function +; CHECK-BE-NEXT: .cfi_startproc +; CHECK-BE-NEXT: # %bb.0: +; CHECK-BE-NEXT: call main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/CSKY/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/CSKY/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..e897127eb31cdd --- /dev/null +++ b/llvm/test/CodeGen/CSKY/naked-fn-with-frame-pointer.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple csky | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: # %bb.0: +; CHECK-NEXT: lrw a0, [.LCPI0_0] +; CHECK-NEXT: jsr16 a0 +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: # %bb.0: +; CHECK-NEXT: subi16 sp, sp, 8 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: st32.w lr, (sp, 4) # 4-byte Folded Spill +; CHECK-NEXT: st32.w l4, (sp, 0) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset l4, -8 +; CHECK-NEXT: mov16 l4, sp +; CHECK-NEXT: .cfi_def_cfa_register l4 +; CHECK-NEXT: subi16 sp, sp, 4 +; CHECK-NEXT: lrw a0, [.LCPI1_0] +; CHECK-NEXT: jsr16 a0 +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/Hexagon/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/Hexagon/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..c53f2d4df9b62c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/naked-fn-with-frame-pointer.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple hexagon | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: call main +; CHECK-NEXT: } + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .cfi_def_cfa r30, 8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: { +; CHECK-NEXT: call main +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/Lanai/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/Lanai/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..4e148764e478b7 --- /dev/null +++ b/llvm/test/CodeGen/Lanai/naked-fn-with-frame-pointer.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple lanai | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: .Lnaked$local: +; CHECK-NEXT: .type .Lnaked$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: add %pc, 0x10, %rca +; CHECK-NEXT: st %rca, [--%sp] +; CHECK-NEXT: bt main +; CHECK-NEXT: nop + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: .Lnormal$local: +; CHECK-NEXT: .type .Lnormal$local,@function +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: st %fp, [--%sp] +; CHECK-NEXT: add %sp, 0x8, %fp +; CHECK-NEXT: sub %sp, 0x8, %sp +; CHECK-NEXT: add %pc, 0x10, %rca +; CHECK-NEXT: st %rca, [--%sp] +; CHECK-NEXT: bt main +; CHECK-NEXT: nop + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/LoongArch/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/LoongArch/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..9bb449101683d6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/naked-fn-with-frame-pointer.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple loongarch32 -mattr +d | FileCheck %s -check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple loongarch64 -mattr +d | FileCheck %s -check-prefixes=CHECK-64 + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-LABEL: naked: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: bl main +; +; CHECK-64-LABEL: naked: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: bl main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-LABEL: normal: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: addi.w $sp, $sp, -16 +; CHECK-32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; CHECK-32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; CHECK-32-NEXT: .cfi_offset 1, -4 +; CHECK-32-NEXT: .cfi_offset 22, -8 +; CHECK-32-NEXT: addi.w $fp, $sp, 16 +; CHECK-32-NEXT: .cfi_def_cfa 22, 0 +; CHECK-32-NEXT: bl main +; +; CHECK-64-LABEL: normal: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: addi.d $sp, $sp, -16 +; CHECK-64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; CHECK-64-NEXT: .cfi_offset 1, -8 +; CHECK-64-NEXT: .cfi_offset 22, -16 +; CHECK-64-NEXT: addi.d $fp, $sp, 16 +; CHECK-64-NEXT: .cfi_def_cfa 22, 0 +; CHECK-64-NEXT: bl main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/M68k/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/M68k/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..807c52c39b6e6a --- /dev/null +++ b/llvm/test/CodeGen/M68k/naked-fn-with-frame-pointer.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple m68k | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: jsr main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: link.w %a6, #0 +; CHECK-NEXT: .cfi_def_cfa_offset -8 +; CHECK-NEXT: .cfi_offset %a6, -8 +; CHECK-NEXT: .cfi_def_cfa_register %a6 +; CHECK-NEXT: jsr main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/MSP430/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/MSP430/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..2fdb01005bb280 --- /dev/null +++ b/llvm/test/CodeGen/MSP430/naked-fn-with-frame-pointer.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple msp430 | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: call #main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: push r4 +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: .cfi_offset r4, -4 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: .cfi_def_cfa_register r4 +; CHECK-NEXT: call #main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/Mips/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/Mips/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..a3820da8b221c9 --- /dev/null +++ b/llvm/test/CodeGen/Mips/naked-fn-with-frame-pointer.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple mips | FileCheck %s -check-prefixes=CHECK-32-BE +; RUN: llc < %s -mtriple mipsel | FileCheck %s -check-prefixes=CHECK-32-LE +; RUN: llc < %s -mtriple mips64 | FileCheck %s -check-prefixes=CHECK-64-BE +; RUN: llc < %s -mtriple mips64el | FileCheck %s -check-prefixes=CHECK-64-LE + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-BE-LABEL: naked: +; CHECK-32-BE: # %bb.0: +; CHECK-32-BE-NEXT: jal main +; CHECK-32-BE-NEXT: nop +; +; CHECK-32-LE-LABEL: naked: +; CHECK-32-LE: # %bb.0: +; CHECK-32-LE-NEXT: jal main +; CHECK-32-LE-NEXT: nop +; +; CHECK-64-BE-LABEL: naked: +; CHECK-64-BE: # %bb.0: +; CHECK-64-BE-NEXT: jal main +; CHECK-64-BE-NEXT: nop +; +; CHECK-64-LE-LABEL: naked: +; CHECK-64-LE: # %bb.0: +; CHECK-64-LE-NEXT: jal main +; CHECK-64-LE-NEXT: nop + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-BE-LABEL: normal: +; CHECK-32-BE: # %bb.0: +; CHECK-32-BE-NEXT: addiu $sp, $sp, -24 +; CHECK-32-BE-NEXT: .cfi_def_cfa_offset 24 +; CHECK-32-BE-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-32-BE-NEXT: sw $fp, 16($sp) # 4-byte Folded Spill +; CHECK-32-BE-NEXT: .cfi_offset 31, -4 +; CHECK-32-BE-NEXT: .cfi_offset 30, -8 +; CHECK-32-BE-NEXT: move $fp, $sp +; CHECK-32-BE-NEXT: .cfi_def_cfa_register 30 +; CHECK-32-BE-NEXT: jal main +; CHECK-32-BE-NEXT: nop +; +; CHECK-32-LE-LABEL: normal: +; CHECK-32-LE: # %bb.0: +; CHECK-32-LE-NEXT: addiu $sp, $sp, -24 +; CHECK-32-LE-NEXT: .cfi_def_cfa_offset 24 +; CHECK-32-LE-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-32-LE-NEXT: sw $fp, 16($sp) # 4-byte Folded Spill +; CHECK-32-LE-NEXT: .cfi_offset 31, -4 +; CHECK-32-LE-NEXT: .cfi_offset 30, -8 +; CHECK-32-LE-NEXT: move $fp, $sp +; CHECK-32-LE-NEXT: .cfi_def_cfa_register 30 +; CHECK-32-LE-NEXT: jal main +; CHECK-32-LE-NEXT: nop +; +; CHECK-64-BE-LABEL: normal: +; CHECK-64-BE: # %bb.0: +; CHECK-64-BE-NEXT: daddiu $sp, $sp, -16 +; CHECK-64-BE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-64-BE-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; CHECK-64-BE-NEXT: sd $fp, 0($sp) # 8-byte Folded Spill +; CHECK-64-BE-NEXT: .cfi_offset 31, -8 +; CHECK-64-BE-NEXT: .cfi_offset 30, -16 +; CHECK-64-BE-NEXT: move $fp, $sp +; CHECK-64-BE-NEXT: .cfi_def_cfa_register 30 +; CHECK-64-BE-NEXT: jal main +; CHECK-64-BE-NEXT: nop +; +; CHECK-64-LE-LABEL: normal: +; CHECK-64-LE: # %bb.0: +; CHECK-64-LE-NEXT: daddiu $sp, $sp, -16 +; CHECK-64-LE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-64-LE-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; CHECK-64-LE-NEXT: sd $fp, 0($sp) # 8-byte Folded Spill +; CHECK-64-LE-NEXT: .cfi_offset 31, -8 +; CHECK-64-LE-NEXT: .cfi_offset 30, -16 +; CHECK-64-LE-NEXT: move $fp, $sp +; CHECK-64-LE-NEXT: .cfi_def_cfa_register 30 +; CHECK-64-LE-NEXT: jal main +; CHECK-64-LE-NEXT: nop + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..a1f0577c2218bd --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple nvptx | FileCheck %s -check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple nvptx64 | FileCheck %s -check-prefixes=CHECK-64 + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-LABEL: naked( +; CHECK-32: { +; CHECK-32-EMPTY: +; CHECK-32-EMPTY: +; CHECK-32-NEXT: // %bb.0: +; CHECK-32-NEXT: { // callseq 0, 0 +; CHECK-32-NEXT: call.uni +; CHECK-32-NEXT: main, +; CHECK-32-NEXT: ( +; CHECK-32-NEXT: ); +; CHECK-32-NEXT: } // callseq 0 +; CHECK-32-NEXT: // begin inline asm +; CHECK-32-NEXT: exit; +; CHECK-32-NEXT: // end inline asm +; +; CHECK-64-LABEL: naked( +; CHECK-64: { +; CHECK-64-EMPTY: +; CHECK-64-EMPTY: +; CHECK-64-NEXT: // %bb.0: +; CHECK-64-NEXT: { // callseq 0, 0 +; CHECK-64-NEXT: call.uni +; CHECK-64-NEXT: main, +; CHECK-64-NEXT: ( +; CHECK-64-NEXT: ); +; CHECK-64-NEXT: } // callseq 0 +; CHECK-64-NEXT: // begin inline asm +; CHECK-64-NEXT: exit; +; CHECK-64-NEXT: // end inline asm + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-LABEL: normal( +; CHECK-32: { +; CHECK-32-EMPTY: +; CHECK-32-EMPTY: +; CHECK-32-NEXT: // %bb.0: +; CHECK-32-NEXT: { // callseq 1, 0 +; CHECK-32-NEXT: call.uni +; CHECK-32-NEXT: main, +; CHECK-32-NEXT: ( +; CHECK-32-NEXT: ); +; CHECK-32-NEXT: } // callseq 1 +; CHECK-32-NEXT: // begin inline asm +; CHECK-32-NEXT: exit; +; CHECK-32-NEXT: // end inline asm +; +; CHECK-64-LABEL: normal( +; CHECK-64: { +; CHECK-64-EMPTY: +; CHECK-64-EMPTY: +; CHECK-64-NEXT: // %bb.0: +; CHECK-64-NEXT: { // callseq 1, 0 +; CHECK-64-NEXT: call.uni +; CHECK-64-NEXT: main, +; CHECK-64-NEXT: ( +; CHECK-64-NEXT: ); +; CHECK-64-NEXT: } // callseq 1 +; CHECK-64-NEXT: // begin inline asm +; CHECK-64-NEXT: exit; +; CHECK-64-NEXT: // end inline asm + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/PowerPC/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/PowerPC/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..59b1044084c645 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/naked-fn-with-frame-pointer.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple powerpc | FileCheck %s -check-prefixes=CHECK-32-BE +; RUN: llc < %s -mtriple powerpcle | FileCheck %s -check-prefixes=CHECK-32-LE +; RUN: llc < %s -mtriple powerpc64 | FileCheck %s -check-prefixes=CHECK-64-BE +; RUN: llc < %s -mtriple powerpc64le | FileCheck %s -check-prefixes=CHECK-64-LE + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-BE-LABEL: naked: +; CHECK-32-BE: # %bb.0: +; CHECK-32-BE-NEXT: bl main +; +; CHECK-32-LE-LABEL: naked: +; CHECK-32-LE: # %bb.0: +; CHECK-32-LE-NEXT: bl main +; +; CHECK-64-BE-LABEL: naked: +; CHECK-64-BE: # %bb.0: +; CHECK-64-BE-NEXT: bl main +; CHECK-64-BE-NEXT: nop +; +; CHECK-64-LE-LABEL: naked: +; CHECK-64-LE: # %bb.0: +; CHECK-64-LE-NEXT: bl main +; CHECK-64-LE-NEXT: nop + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-BE-LABEL: normal: +; CHECK-32-BE: # %bb.0: +; CHECK-32-BE-NEXT: mflr 0 +; CHECK-32-BE-NEXT: stwu 1, -16(1) +; CHECK-32-BE-NEXT: stw 31, 12(1) +; CHECK-32-BE-NEXT: stw 0, 20(1) +; CHECK-32-BE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-32-BE-NEXT: .cfi_offset r31, -4 +; CHECK-32-BE-NEXT: .cfi_offset lr, 4 +; CHECK-32-BE-NEXT: mr 31, 1 +; CHECK-32-BE-NEXT: .cfi_def_cfa_register r31 +; CHECK-32-BE-NEXT: bl main +; +; CHECK-32-LE-LABEL: normal: +; CHECK-32-LE: # %bb.0: +; CHECK-32-LE-NEXT: mflr 0 +; CHECK-32-LE-NEXT: stwu 1, -16(1) +; CHECK-32-LE-NEXT: stw 31, 12(1) +; CHECK-32-LE-NEXT: stw 0, 20(1) +; CHECK-32-LE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-32-LE-NEXT: .cfi_offset r31, -4 +; CHECK-32-LE-NEXT: .cfi_offset lr, 4 +; CHECK-32-LE-NEXT: mr 31, 1 +; CHECK-32-LE-NEXT: .cfi_def_cfa_register r31 +; CHECK-32-LE-NEXT: bl main +; +; CHECK-64-BE-LABEL: normal: +; CHECK-64-BE: # %bb.0: +; CHECK-64-BE-NEXT: mflr 0 +; CHECK-64-BE-NEXT: std 31, -8(1) +; CHECK-64-BE-NEXT: stdu 1, -128(1) +; CHECK-64-BE-NEXT: std 0, 144(1) +; CHECK-64-BE-NEXT: .cfi_def_cfa_offset 128 +; CHECK-64-BE-NEXT: .cfi_offset r31, -8 +; CHECK-64-BE-NEXT: .cfi_offset lr, 16 +; CHECK-64-BE-NEXT: mr 31, 1 +; CHECK-64-BE-NEXT: .cfi_def_cfa_register r31 +; CHECK-64-BE-NEXT: bl main +; CHECK-64-BE-NEXT: nop +; +; CHECK-64-LE-LABEL: normal: +; CHECK-64-LE: # %bb.0: +; CHECK-64-LE-NEXT: mflr 0 +; CHECK-64-LE-NEXT: std 31, -8(1) +; CHECK-64-LE-NEXT: stdu 1, -48(1) +; CHECK-64-LE-NEXT: std 0, 64(1) +; CHECK-64-LE-NEXT: .cfi_def_cfa_offset 48 +; CHECK-64-LE-NEXT: .cfi_offset r31, -8 +; CHECK-64-LE-NEXT: .cfi_offset lr, 16 +; CHECK-64-LE-NEXT: mr 31, 1 +; CHECK-64-LE-NEXT: .cfi_def_cfa_register r31 +; CHECK-64-LE-NEXT: bl main +; CHECK-64-LE-NEXT: nop + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/PowerPC/stack-guard-global.ll b/llvm/test/CodeGen/PowerPC/stack-guard-global.ll new file mode 100644 index 00000000000000..022a62a4b0918d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/stack-guard-global.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=powerpc64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=BE64 +; RUN: llc -mtriple=powerpc64le -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=LE64 +; RUN: llc -mtriple=ppc32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=LE32 + +define void @foo(i64 %t) sspstrong nounwind { +; BE64-LABEL: foo: +; BE64: # %bb.0: +; BE64-NEXT: mflr 0 +; BE64-NEXT: std 31, -8(1) +; BE64-NEXT: stdu 1, -144(1) +; BE64-NEXT: mr 31, 1 +; BE64-NEXT: std 0, 160(1) +; BE64-NEXT: std 30, 128(31) # 8-byte Folded Spill +; BE64-NEXT: addis 30, 2, __stack_chk_guard@toc@ha +; BE64-NEXT: sldi 3, 3, 2 +; BE64-NEXT: ld 4, __stack_chk_guard@toc@l(30) +; BE64-NEXT: addi 3, 3, 15 +; BE64-NEXT: rldicr 3, 3, 0, 59 +; BE64-NEXT: neg 3, 3 +; BE64-NEXT: std 4, 120(31) +; BE64-NEXT: addi 4, 31, 144 +; BE64-NEXT: stdux 4, 1, 3 +; BE64-NEXT: addi 3, 1, 112 +; BE64-NEXT: bl baz +; BE64-NEXT: nop +; BE64-NEXT: ld 3, __stack_chk_guard@toc@l(30) +; BE64-NEXT: ld 4, 120(31) +; BE64-NEXT: cmpld 3, 4 +; BE64-NEXT: bne 0, .LBB0_2 +; BE64-NEXT: # %bb.1: +; BE64-NEXT: ld 30, 128(31) # 8-byte Folded Reload +; BE64-NEXT: ld 1, 0(1) +; BE64-NEXT: ld 0, 16(1) +; BE64-NEXT: ld 31, -8(1) +; BE64-NEXT: mtlr 0 +; BE64-NEXT: blr +; BE64-NEXT: .LBB0_2: +; BE64-NEXT: bl __stack_chk_fail +; BE64-NEXT: nop +; +; LE64-LABEL: foo: +; LE64: # %bb.0: +; LE64-NEXT: mflr 0 +; LE64-NEXT: std 31, -8(1) +; LE64-NEXT: stdu 1, -64(1) +; LE64-NEXT: mr 31, 1 +; LE64-NEXT: sldi 3, 3, 2 +; LE64-NEXT: std 0, 80(1) +; LE64-NEXT: std 30, 48(31) # 8-byte Folded Spill +; LE64-NEXT: addis 30, 2, __stack_chk_guard@toc@ha +; LE64-NEXT: addi 3, 3, 15 +; LE64-NEXT: ld 4, __stack_chk_guard@toc@l(30) +; LE64-NEXT: rldicr 3, 3, 0, 59 +; LE64-NEXT: neg 3, 3 +; LE64-NEXT: std 4, 40(31) +; LE64-NEXT: addi 4, 31, 64 +; LE64-NEXT: stdux 4, 1, 3 +; LE64-NEXT: addi 3, 1, 32 +; LE64-NEXT: bl baz +; LE64-NEXT: nop +; LE64-NEXT: ld 3, __stack_chk_guard@toc@l(30) +; LE64-NEXT: ld 4, 40(31) +; LE64-NEXT: cmpld 3, 4 +; LE64-NEXT: bne 0, .LBB0_2 +; LE64-NEXT: # %bb.1: +; LE64-NEXT: ld 30, 48(31) # 8-byte Folded Reload +; LE64-NEXT: ld 1, 0(1) +; LE64-NEXT: ld 0, 16(1) +; LE64-NEXT: ld 31, -8(1) +; LE64-NEXT: mtlr 0 +; LE64-NEXT: blr +; LE64-NEXT: .LBB0_2: +; LE64-NEXT: bl __stack_chk_fail +; LE64-NEXT: nop +; +; LE32-LABEL: foo: +; LE32: # %bb.0: +; LE32-NEXT: mflr 0 +; LE32-NEXT: stwu 1, -32(1) +; LE32-NEXT: stw 31, 28(1) +; LE32-NEXT: mr 31, 1 +; LE32-NEXT: stw 0, 36(1) +; LE32-NEXT: slwi 4, 4, 2 +; LE32-NEXT: stw 30, 24(31) # 4-byte Folded Spill +; LE32-NEXT: lis 30, __stack_chk_guard@ha +; LE32-NEXT: lwz 3, __stack_chk_guard@l(30) +; LE32-NEXT: addi 4, 4, 15 +; LE32-NEXT: rlwinm 4, 4, 0, 0, 27 +; LE32-NEXT: neg 4, 4 +; LE32-NEXT: stw 3, 20(31) +; LE32-NEXT: addi 3, 31, 32 +; LE32-NEXT: stwux 3, 1, 4 +; LE32-NEXT: addi 3, 1, 16 +; LE32-NEXT: bl baz +; LE32-NEXT: lwz 3, __stack_chk_guard@l(30) +; LE32-NEXT: lwz 4, 20(31) +; LE32-NEXT: cmplw 3, 4 +; LE32-NEXT: bne 0, .LBB0_2 +; LE32-NEXT: # %bb.1: +; LE32-NEXT: lwz 30, 24(31) # 4-byte Folded Reload +; LE32-NEXT: lwz 31, 0(1) +; LE32-NEXT: lwz 0, -4(31) +; LE32-NEXT: mr 1, 31 +; LE32-NEXT: mr 31, 0 +; LE32-NEXT: lwz 0, 4(1) +; LE32-NEXT: mtlr 0 +; LE32-NEXT: blr +; LE32-NEXT: .LBB0_2: +; LE32-NEXT: bl __stack_chk_fail + %vla = alloca i32, i64 %t, align 4 + call void @baz(ptr %vla) + ret void +} + +declare void @baz(ptr) + +!llvm.module.flags = !{!1} +!1 = !{i32 2, !"stack-protector-guard", !"global"} diff --git a/llvm/test/CodeGen/PowerPC/stack-guard-tls.ll b/llvm/test/CodeGen/PowerPC/stack-guard-tls.ll new file mode 100644 index 00000000000000..de0becc037309f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/stack-guard-tls.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=powerpc64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=BE64 +; RUN: llc -mtriple=powerpc64le -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=LE64 +; RUN: llc -mtriple=ppc32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=LE32 + +define void @foo(i64 %t) sspstrong nounwind { +; BE64-LABEL: foo: +; BE64: # %bb.0: +; BE64-NEXT: mflr 0 +; BE64-NEXT: std 31, -8(1) +; BE64-NEXT: stdu 1, -144(1) +; BE64-NEXT: ld 4, 500(13) +; BE64-NEXT: sldi 3, 3, 2 +; BE64-NEXT: mr 31, 1 +; BE64-NEXT: addi 3, 3, 15 +; BE64-NEXT: rldicr 3, 3, 0, 59 +; BE64-NEXT: std 0, 160(1) +; BE64-NEXT: neg 3, 3 +; BE64-NEXT: std 4, 128(31) +; BE64-NEXT: addi 4, 31, 144 +; BE64-NEXT: stdux 4, 1, 3 +; BE64-NEXT: addi 3, 1, 112 +; BE64-NEXT: bl baz +; BE64-NEXT: nop +; BE64-NEXT: ld 3, 128(31) +; BE64-NEXT: ld 4, 500(13) +; BE64-NEXT: cmpld 4, 3 +; BE64-NEXT: bne 0, .LBB0_2 +; BE64-NEXT: # %bb.1: +; BE64-NEXT: ld 1, 0(1) +; BE64-NEXT: ld 0, 16(1) +; BE64-NEXT: ld 31, -8(1) +; BE64-NEXT: mtlr 0 +; BE64-NEXT: blr +; BE64-NEXT: .LBB0_2: +; BE64-NEXT: bl __stack_chk_fail +; BE64-NEXT: nop +; +; LE64-LABEL: foo: +; LE64: # %bb.0: +; LE64-NEXT: mflr 0 +; LE64-NEXT: std 31, -8(1) +; LE64-NEXT: stdu 1, -64(1) +; LE64-NEXT: sldi 3, 3, 2 +; LE64-NEXT: ld 4, 500(13) +; LE64-NEXT: std 0, 80(1) +; LE64-NEXT: addi 3, 3, 15 +; LE64-NEXT: mr 31, 1 +; LE64-NEXT: std 4, 48(31) +; LE64-NEXT: addi 4, 31, 64 +; LE64-NEXT: rldicr 3, 3, 0, 59 +; LE64-NEXT: neg 3, 3 +; LE64-NEXT: stdux 4, 1, 3 +; LE64-NEXT: addi 3, 1, 32 +; LE64-NEXT: bl baz +; LE64-NEXT: nop +; LE64-NEXT: ld 3, 48(31) +; LE64-NEXT: ld 4, 500(13) +; LE64-NEXT: cmpld 4, 3 +; LE64-NEXT: bne 0, .LBB0_2 +; LE64-NEXT: # %bb.1: +; LE64-NEXT: ld 1, 0(1) +; LE64-NEXT: ld 0, 16(1) +; LE64-NEXT: ld 31, -8(1) +; LE64-NEXT: mtlr 0 +; LE64-NEXT: blr +; LE64-NEXT: .LBB0_2: +; LE64-NEXT: bl __stack_chk_fail +; LE64-NEXT: nop +; +; LE32-LABEL: foo: +; LE32: # %bb.0: +; LE32-NEXT: mflr 0 +; LE32-NEXT: stwu 1, -32(1) +; LE32-NEXT: lwz 3, 500(2) +; LE32-NEXT: slwi 4, 4, 2 +; LE32-NEXT: addi 4, 4, 15 +; LE32-NEXT: stw 31, 28(1) +; LE32-NEXT: mr 31, 1 +; LE32-NEXT: rlwinm 4, 4, 0, 0, 27 +; LE32-NEXT: stw 0, 36(1) +; LE32-NEXT: neg 4, 4 +; LE32-NEXT: stw 3, 24(31) +; LE32-NEXT: addi 3, 31, 32 +; LE32-NEXT: stwux 3, 1, 4 +; LE32-NEXT: addi 3, 1, 16 +; LE32-NEXT: bl baz +; LE32-NEXT: lwz 3, 24(31) +; LE32-NEXT: lwz 4, 500(2) +; LE32-NEXT: cmplw 4, 3 +; LE32-NEXT: bne 0, .LBB0_2 +; LE32-NEXT: # %bb.1: +; LE32-NEXT: lwz 31, 0(1) +; LE32-NEXT: lwz 0, -4(31) +; LE32-NEXT: mr 1, 31 +; LE32-NEXT: mr 31, 0 +; LE32-NEXT: lwz 0, 4(1) +; LE32-NEXT: mtlr 0 +; LE32-NEXT: blr +; LE32-NEXT: .LBB0_2: +; LE32-NEXT: bl __stack_chk_fail + %vla = alloca i32, i64 %t, align 4 + call void @baz(ptr %vla) + ret void +} + +declare void @baz(ptr) + +!llvm.module.flags = !{!1, !2} +!1 = !{i32 2, !"stack-protector-guard", !"tls"} +!2 = !{i32 2, !"stack-protector-guard-offset", i32 500} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll index c480ba800c6904..08e91736582012 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll @@ -39,6 +39,39 @@ define double @constraint_f_double(double %a) nounwind { ret double %2 } +define double @constraint_cf_double(double %a) nounwind { +; RV32F-LABEL: constraint_cf_double: +; RV32F: # %bb.0: +; RV32F-NEXT: addi sp, sp, -16 +; RV32F-NEXT: sw a0, 8(sp) +; RV32F-NEXT: sw a1, 12(sp) +; RV32F-NEXT: fld fa5, 8(sp) +; RV32F-NEXT: lui a0, %hi(gd) +; RV32F-NEXT: fld fa4, %lo(gd)(a0) +; RV32F-NEXT: #APP +; RV32F-NEXT: fadd.d fa5, fa5, fa4 +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fsd fa5, 8(sp) +; RV32F-NEXT: lw a0, 8(sp) +; RV32F-NEXT: lw a1, 12(sp) +; RV32F-NEXT: addi sp, sp, 16 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_cf_double: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gd) +; RV64F-NEXT: fld fa5, %lo(gd)(a1) +; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: fadd.d fa5, fa4, fa5 +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.d a0, fa5 +; RV64F-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm "fadd.d $0, $1, $2", "=^cf,^cf,^cf"(double %a, double %1) + ret double %2 +} + define double @constraint_f_double_abi_name(double %a) nounwind { ; RV32F-LABEL: constraint_f_double_abi_name: ; RV32F: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll b/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll new file mode 100644 index 00000000000000..581cf8e3bf3c9e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs -no-integrated-as < %s \ +; RUN: | FileCheck -check-prefix=RV32F %s +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs -no-integrated-as < %s \ +; RUN: | FileCheck -check-prefix=RV64F %s + +;; `.insn 0x4, 0x02000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)` is +;; the raw encoding for `fadd.d` + +@gd = external global double + +define double @constraint_f_double(double %a) nounwind { +; RV32F-LABEL: constraint_f_double: +; RV32F: # %bb.0: +; RV32F-NEXT: addi sp, sp, -16 +; RV32F-NEXT: sw a0, 8(sp) +; RV32F-NEXT: sw a1, 12(sp) +; RV32F-NEXT: fld fa5, 8(sp) +; RV32F-NEXT: lui a0, %hi(gd) +; RV32F-NEXT: fld fa4, %lo(gd)(a0) +; RV32F-NEXT: #APP +; RV32F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (15 << 15) | (14 << 20) +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fsd fa5, 8(sp) +; RV32F-NEXT: lw a0, 8(sp) +; RV32F-NEXT: lw a1, 12(sp) +; RV32F-NEXT: addi sp, sp, 16 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_f_double: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gd) +; RV64F-NEXT: fld fa5, %lo(gd)(a1) +; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.d a0, fa5 +; RV64F-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm ".insn 0x4, 0x02000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=f,f,f"(double %a, double %1) + ret double %2 +} + +define double @constraint_cf_double(double %a) nounwind { +; RV32F-LABEL: constraint_cf_double: +; RV32F: # %bb.0: +; RV32F-NEXT: addi sp, sp, -16 +; RV32F-NEXT: sw a0, 8(sp) +; RV32F-NEXT: sw a1, 12(sp) +; RV32F-NEXT: fld fa5, 8(sp) +; RV32F-NEXT: lui a0, %hi(gd) +; RV32F-NEXT: fld fa4, %lo(gd)(a0) +; RV32F-NEXT: #APP +; RV32F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (15 << 15) | (14 << 20) +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fsd fa5, 8(sp) +; RV32F-NEXT: lw a0, 8(sp) +; RV32F-NEXT: lw a1, 12(sp) +; RV32F-NEXT: addi sp, sp, 16 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_cf_double: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gd) +; RV64F-NEXT: fld fa5, %lo(gd)(a1) +; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.d a0, fa5 +; RV64F-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm ".insn 0x4, 0x02000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=^cf,^cf,^cf"(double %a, double %1) + ret double %2 +} + +define double @constraint_f_double_abi_name(double %a) nounwind { +; RV32F-LABEL: constraint_f_double_abi_name: +; RV32F: # %bb.0: +; RV32F-NEXT: addi sp, sp, -16 +; RV32F-NEXT: sw a0, 8(sp) +; RV32F-NEXT: sw a1, 12(sp) +; RV32F-NEXT: fld fa1, 8(sp) +; RV32F-NEXT: lui a0, %hi(gd) +; RV32F-NEXT: fld fs0, %lo(gd)(a0) +; RV32F-NEXT: #APP +; RV32F-NEXT: .insn 0x4, 0x02000053 | (0 << 7) | (11 << 15) | (8 << 20) +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fsd ft0, 8(sp) +; RV32F-NEXT: lw a0, 8(sp) +; RV32F-NEXT: lw a1, 12(sp) +; RV32F-NEXT: addi sp, sp, 16 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_f_double_abi_name: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gd) +; RV64F-NEXT: fld fs0, %lo(gd)(a1) +; RV64F-NEXT: fmv.d.x fa1, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: .insn 0x4, 0x02000053 | (0 << 7) | (11 << 15) | (8 << 20) +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.d a0, ft0 +; RV64F-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm ".insn 0x4, 0x02000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "={ft0},{fa1},{fs0}"(double %a, double %1) + ret double %2 +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll index 91922cd236dfff..a91c6544f9e29c 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; NOTE: Assertions gave been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32F %s ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64 -verify-machineinstrs < %s \ @@ -38,6 +37,33 @@ define float @constraint_f_float(float %a) nounwind { ret float %2 } +define float @constraint_cf_float(float %a) nounwind { +; RV32F-LABEL: constraint_cf_float: +; RV32F: # %bb.0: +; RV32F-NEXT: lui a1, %hi(gf) +; RV32F-NEXT: flw fa5, %lo(gf)(a1) +; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: #APP +; RV32F-NEXT: fadd.s fa5, fa4, fa5 +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fmv.x.w a0, fa5 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_cf_float: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gf) +; RV64F-NEXT: flw fa5, %lo(gf)(a1) +; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: fadd.s fa5, fa4, fa5 +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.w a0, fa5 +; RV64F-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm "fadd.s $0, $1, $2", "=^cf,cf,cf"(float %a, float %1) + ret float %2 +} + define float @constraint_f_float_abi_name(float %a) nounwind { ; RV32F-LABEL: constraint_f_float_abi_name: ; RV32F: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll b/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll new file mode 100644 index 00000000000000..a0de5c71a7df6a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32 -verify-machineinstrs -no-integrated-as < %s \ +; RUN: | FileCheck -check-prefix=RV32F %s +; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64 -verify-machineinstrs -no-integrated-as < %s \ +; RUN: | FileCheck -check-prefix=RV64F %s +; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs -no-integrated-as < %s \ +; RUN: | FileCheck -check-prefix=RV32F %s +; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs -no-integrated-as < %s \ +; RUN: | FileCheck -check-prefix=RV64F %s + +;; `.insn 0x4, 0x53 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)` is +;; the raw encoding for `fadd.s` + +@gf = external global float + +define float @constraint_f_modifier_N_float(float %a) nounwind { +; RV32F-LABEL: constraint_f_modifier_N_float: +; RV32F: # %bb.0: +; RV32F-NEXT: lui a1, %hi(gf) +; RV32F-NEXT: flw fa5, %lo(gf)(a1) +; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: #APP +; RV32F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fmv.x.w a0, fa5 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_f_modifier_N_float: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gf) +; RV64F-NEXT: flw fa5, %lo(gf)(a1) +; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.w a0, fa5 +; RV64F-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm ".insn 0x4, 0x53 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=f,f,f"(float %a, float %1) + ret float %2 +} + + +define float @constraint_cf_modifier_N_float(float %a) nounwind { +; RV32F-LABEL: constraint_cf_modifier_N_float: +; RV32F: # %bb.0: +; RV32F-NEXT: lui a1, %hi(gf) +; RV32F-NEXT: flw fa5, %lo(gf)(a1) +; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: #APP +; RV32F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fmv.x.w a0, fa5 +; RV32F-NEXT: ret +; +; RV64F-LABEL: constraint_cf_modifier_N_float: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gf) +; RV64F-NEXT: flw fa5, %lo(gf)(a1) +; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.w a0, fa5 +; RV64F-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm ".insn 0x4, 0x53 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=^cf,^cf,^cf"(float %a, float %1) + ret float %2 +} + +define float @modifier_N_float_abi_name(float %a) nounwind { +; RV32F-LABEL: modifier_N_float_abi_name: +; RV32F: # %bb.0: +; RV32F-NEXT: lui a1, %hi(gf) +; RV32F-NEXT: flw fs0, %lo(gf)(a1) +; RV32F-NEXT: fmv.w.x fa0, a0 +; RV32F-NEXT: #APP +; RV32F-NEXT: .insn 0x4, 0x53 | (0 << 7) | (10 << 15) | (8 << 20) +; RV32F-NEXT: #NO_APP +; RV32F-NEXT: fmv.x.w a0, ft0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: modifier_N_float_abi_name: +; RV64F: # %bb.0: +; RV64F-NEXT: lui a1, %hi(gf) +; RV64F-NEXT: flw fs0, %lo(gf)(a1) +; RV64F-NEXT: fmv.w.x fa0, a0 +; RV64F-NEXT: #APP +; RV64F-NEXT: .insn 0x4, 0x53 | (0 << 7) | (10 << 15) | (8 << 20) +; RV64F-NEXT: #NO_APP +; RV64F-NEXT: fmv.x.w a0, ft0 +; RV64F-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm ".insn 0x4, 0x53 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "={ft0},{fa0},{fs0}"(float %a, float %1) + ret float %2 +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll b/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll index 14b7cb89667491..deffa177c5e6b3 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll @@ -31,6 +31,14 @@ define void @constraint_f() nounwind { ret void } +define void @constraint_cf() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'cf' + tail call void asm "fadd.s fa0, fa0, $0", "^cf"(float 0.0) +; CHECK: error: couldn't allocate input reg for constraint 'cf' + tail call void asm "fadd.d fa0, fa0, $0", "^cf"(double 0.0) + ret void +} + define void @constraint_r_fixed_vec() nounwind { ; CHECK: error: couldn't allocate input reg for constraint 'r' tail call void asm "add a0, a0, $0", "r"(<4 x i32> zeroinitializer) @@ -42,3 +50,15 @@ define void @constraint_r_scalable_vec() nounwind { tail call void asm "add a0, a0, $0", "r"( zeroinitializer) ret void } + +define void @constraint_cr_fixed_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'cr' + tail call void asm "add a0, a0, $0", "^cr"(<4 x i32> zeroinitializer) + ret void +} + +define void @constraint_cr_scalable_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'cr' + tail call void asm "add a0, a0, $0", "^cr"( zeroinitializer) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zdinx-constraint-r.ll b/llvm/test/CodeGen/RISCV/inline-asm-zdinx-constraint-r.ll new file mode 100644 index 00000000000000..15729ee2bc61e9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/inline-asm-zdinx-constraint-r.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+zdinx -target-abi=ilp32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32FINX %s +; RUN: llc -mtriple=riscv64 -mattr=+zdinx -target-abi=lp64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64FINX %s + +@gd = external global double + +define double @constraint_r_double(double %a) nounwind { +; RV32FINX-LABEL: constraint_r_double: +; RV32FINX: # %bb.0: +; RV32FINX-NEXT: lui a2, %hi(gd) +; RV32FINX-NEXT: lw a3, %lo(gd+4)(a2) +; RV32FINX-NEXT: lw a2, %lo(gd)(a2) +; RV32FINX-NEXT: #APP +; RV32FINX-NEXT: fadd.d a0, a0, a2 +; RV32FINX-NEXT: #NO_APP +; RV32FINX-NEXT: ret +; +; RV64FINX-LABEL: constraint_r_double: +; RV64FINX: # %bb.0: +; RV64FINX-NEXT: lui a1, %hi(gd) +; RV64FINX-NEXT: ld a1, %lo(gd)(a1) +; RV64FINX-NEXT: #APP +; RV64FINX-NEXT: fadd.d a0, a0, a1 +; RV64FINX-NEXT: #NO_APP +; RV64FINX-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm "fadd.d $0, $1, $2", "=r,r,r"(double %a, double %1) + ret double %2 +} + +define double @constraint_cr_double(double %a) nounwind { +; RV32FINX-LABEL: constraint_cr_double: +; RV32FINX: # %bb.0: +; RV32FINX-NEXT: lui a2, %hi(gd) +; RV32FINX-NEXT: lw a3, %lo(gd+4)(a2) +; RV32FINX-NEXT: lw a2, %lo(gd)(a2) +; RV32FINX-NEXT: #APP +; RV32FINX-NEXT: fadd.d a0, a0, a2 +; RV32FINX-NEXT: #NO_APP +; RV32FINX-NEXT: ret +; +; RV64FINX-LABEL: constraint_cr_double: +; RV64FINX: # %bb.0: +; RV64FINX-NEXT: lui a1, %hi(gd) +; RV64FINX-NEXT: ld a1, %lo(gd)(a1) +; RV64FINX-NEXT: #APP +; RV64FINX-NEXT: fadd.d a0, a0, a1 +; RV64FINX-NEXT: #NO_APP +; RV64FINX-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm "fadd.d $0, $1, $2", "=^cr,^cr,^cr"(double %a, double %1) + ret double %2 +} + +define double @constraint_double_abi_name(double %a) nounwind { +; RV32FINX-LABEL: constraint_double_abi_name: +; RV32FINX: # %bb.0: +; RV32FINX-NEXT: addi sp, sp, -16 +; RV32FINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32FINX-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32FINX-NEXT: lui a2, %hi(gd) +; RV32FINX-NEXT: lw s0, %lo(gd)(a2) +; RV32FINX-NEXT: lw s1, %lo(gd+4)(a2) +; RV32FINX-NEXT: #APP +; RV32FINX-NEXT: fadd.d t1, a0, s0 +; RV32FINX-NEXT: #NO_APP +; RV32FINX-NEXT: mv a0, t1 +; RV32FINX-NEXT: mv a1, t2 +; RV32FINX-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32FINX-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32FINX-NEXT: addi sp, sp, 16 +; RV32FINX-NEXT: ret +; +; RV64FINX-LABEL: constraint_double_abi_name: +; RV64FINX: # %bb.0: +; RV64FINX-NEXT: addi sp, sp, -16 +; RV64FINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64FINX-NEXT: lui a1, %hi(gd) +; RV64FINX-NEXT: ld s0, %lo(gd)(a1) +; RV64FINX-NEXT: #APP +; RV64FINX-NEXT: fadd.d t1, a0, s0 +; RV64FINX-NEXT: #NO_APP +; RV64FINX-NEXT: mv a0, t1 +; RV64FINX-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64FINX-NEXT: addi sp, sp, 16 +; RV64FINX-NEXT: ret + %1 = load double, ptr @gd + %2 = tail call double asm "fadd.d $0, $1, $2", "={t1},{a0},{s0}"(double %a, double %1) + ret double %2 +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zfh-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-zfh-constraint-f.ll index 8caf5956e7a7a7..83145ba69673d5 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-zfh-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-zfh-constraint-f.ll @@ -51,6 +51,47 @@ define half @constraint_f_half(half %a) nounwind { ret half %2 } +define half @constraint_cf_half(half %a) nounwind { +; RV32ZFH-LABEL: constraint_cf_half: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(gh) +; RV32ZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV32ZFH-NEXT: #APP +; RV32ZFH-NEXT: fadd.h fa0, fa0, fa5 +; RV32ZFH-NEXT: #NO_APP +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: constraint_cf_half: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: lui a0, %hi(gh) +; RV64ZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV64ZFH-NEXT: #APP +; RV64ZFH-NEXT: fadd.h fa0, fa0, fa5 +; RV64ZFH-NEXT: #NO_APP +; RV64ZFH-NEXT: ret +; +; RV32DZFH-LABEL: constraint_cf_half: +; RV32DZFH: # %bb.0: +; RV32DZFH-NEXT: lui a0, %hi(gh) +; RV32DZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV32DZFH-NEXT: #APP +; RV32DZFH-NEXT: fadd.h fa0, fa0, fa5 +; RV32DZFH-NEXT: #NO_APP +; RV32DZFH-NEXT: ret +; +; RV64DZFH-LABEL: constraint_cf_half: +; RV64DZFH: # %bb.0: +; RV64DZFH-NEXT: lui a0, %hi(gh) +; RV64DZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV64DZFH-NEXT: #APP +; RV64DZFH-NEXT: fadd.h fa0, fa0, fa5 +; RV64DZFH-NEXT: #NO_APP +; RV64DZFH-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm "fadd.h $0, $1, $2", "=^cf,^cf,^cf"(half %a, half %1) + ret half %2 +} + define half @constraint_f_half_abi_name(half %a) nounwind { ; RV32ZFH-LABEL: constraint_f_half_abi_name: ; RV32ZFH: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zfh-modifier-N.ll b/llvm/test/CodeGen/RISCV/inline-asm-zfh-modifier-N.ll new file mode 100644 index 00000000000000..d1eb2a2d8b102a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/inline-asm-zfh-modifier-N.ll @@ -0,0 +1,157 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=zfh -verify-machineinstrs -no-integrated-as < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=RV32ZFH %s +; RUN: llc -mtriple=riscv64 -mattr=zfh -verify-machineinstrs -no-integrated-as < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=RV64ZFH %s +; RUN: llc -mtriple=riscv32 -mattr=zfh,+d -verify-machineinstrs -no-integrated-as < %s \ +; RUN: -target-abi=ilp32d | FileCheck -check-prefix=RV32DZFH %s +; RUN: llc -mtriple=riscv64 -mattr=zfh,+d -verify-machineinstrs -no-integrated-as < %s \ +; RUN: -target-abi=lp64d | FileCheck -check-prefix=RV64DZFH %s + +;; `.insn 0x4, 0x04000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)` is +;; the raw encoding for `fadd.h` + +@gh = external global half + +define half @constraint_f_half(half %a) nounwind { +; RV32ZFH-LABEL: constraint_f_half: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(gh) +; RV32ZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV32ZFH-NEXT: #APP +; RV32ZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV32ZFH-NEXT: #NO_APP +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: constraint_f_half: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: lui a0, %hi(gh) +; RV64ZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV64ZFH-NEXT: #APP +; RV64ZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV64ZFH-NEXT: #NO_APP +; RV64ZFH-NEXT: ret +; +; RV32DZFH-LABEL: constraint_f_half: +; RV32DZFH: # %bb.0: +; RV32DZFH-NEXT: lui a0, %hi(gh) +; RV32DZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV32DZFH-NEXT: #APP +; RV32DZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV32DZFH-NEXT: #NO_APP +; RV32DZFH-NEXT: ret +; +; RV64DZFH-LABEL: constraint_f_half: +; RV64DZFH: # %bb.0: +; RV64DZFH-NEXT: lui a0, %hi(gh) +; RV64DZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV64DZFH-NEXT: #APP +; RV64DZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV64DZFH-NEXT: #NO_APP +; RV64DZFH-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm ".insn 0x4, 0x04000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=f,f,f"(half %a, half %1) + ret half %2 +} + +define half @constraint_cf_half(half %a) nounwind { +; RV32ZFH-LABEL: constraint_cf_half: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: lui a0, %hi(gh) +; RV32ZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV32ZFH-NEXT: #APP +; RV32ZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV32ZFH-NEXT: #NO_APP +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: constraint_cf_half: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: lui a0, %hi(gh) +; RV64ZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV64ZFH-NEXT: #APP +; RV64ZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV64ZFH-NEXT: #NO_APP +; RV64ZFH-NEXT: ret +; +; RV32DZFH-LABEL: constraint_cf_half: +; RV32DZFH: # %bb.0: +; RV32DZFH-NEXT: lui a0, %hi(gh) +; RV32DZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV32DZFH-NEXT: #APP +; RV32DZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV32DZFH-NEXT: #NO_APP +; RV32DZFH-NEXT: ret +; +; RV64DZFH-LABEL: constraint_cf_half: +; RV64DZFH: # %bb.0: +; RV64DZFH-NEXT: lui a0, %hi(gh) +; RV64DZFH-NEXT: flh fa5, %lo(gh)(a0) +; RV64DZFH-NEXT: #APP +; RV64DZFH-NEXT: .insn 0x4, 0x04000053 | (10 << 7) | (10 << 15) | (15 << 20) +; RV64DZFH-NEXT: #NO_APP +; RV64DZFH-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm ".insn 0x4, 0x04000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=^cf,^cf,^cf"(half %a, half %1) + ret half %2 +} + +define half @constraint_f_half_abi_name(half %a) nounwind { +; RV32ZFH-LABEL: constraint_f_half_abi_name: +; RV32ZFH: # %bb.0: +; RV32ZFH-NEXT: addi sp, sp, -16 +; RV32ZFH-NEXT: fsw fs0, 12(sp) # 4-byte Folded Spill +; RV32ZFH-NEXT: lui a0, %hi(gh) +; RV32ZFH-NEXT: flh fs0, %lo(gh)(a0) +; RV32ZFH-NEXT: #APP +; RV32ZFH-NEXT: .insn 0x4, 0x04000053 | (0 << 7) | (10 << 15) | (8 << 20) +; RV32ZFH-NEXT: #NO_APP +; RV32ZFH-NEXT: fmv.h fa0, ft0 +; RV32ZFH-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload +; RV32ZFH-NEXT: addi sp, sp, 16 +; RV32ZFH-NEXT: ret +; +; RV64ZFH-LABEL: constraint_f_half_abi_name: +; RV64ZFH: # %bb.0: +; RV64ZFH-NEXT: addi sp, sp, -16 +; RV64ZFH-NEXT: fsw fs0, 12(sp) # 4-byte Folded Spill +; RV64ZFH-NEXT: lui a0, %hi(gh) +; RV64ZFH-NEXT: flh fs0, %lo(gh)(a0) +; RV64ZFH-NEXT: #APP +; RV64ZFH-NEXT: .insn 0x4, 0x04000053 | (0 << 7) | (10 << 15) | (8 << 20) +; RV64ZFH-NEXT: #NO_APP +; RV64ZFH-NEXT: fmv.h fa0, ft0 +; RV64ZFH-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload +; RV64ZFH-NEXT: addi sp, sp, 16 +; RV64ZFH-NEXT: ret +; +; RV32DZFH-LABEL: constraint_f_half_abi_name: +; RV32DZFH: # %bb.0: +; RV32DZFH-NEXT: addi sp, sp, -16 +; RV32DZFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill +; RV32DZFH-NEXT: lui a0, %hi(gh) +; RV32DZFH-NEXT: flh fs0, %lo(gh)(a0) +; RV32DZFH-NEXT: #APP +; RV32DZFH-NEXT: .insn 0x4, 0x04000053 | (0 << 7) | (10 << 15) | (8 << 20) +; RV32DZFH-NEXT: #NO_APP +; RV32DZFH-NEXT: fmv.h fa0, ft0 +; RV32DZFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload +; RV32DZFH-NEXT: addi sp, sp, 16 +; RV32DZFH-NEXT: ret +; +; RV64DZFH-LABEL: constraint_f_half_abi_name: +; RV64DZFH: # %bb.0: +; RV64DZFH-NEXT: addi sp, sp, -16 +; RV64DZFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill +; RV64DZFH-NEXT: lui a0, %hi(gh) +; RV64DZFH-NEXT: flh fs0, %lo(gh)(a0) +; RV64DZFH-NEXT: #APP +; RV64DZFH-NEXT: .insn 0x4, 0x04000053 | (0 << 7) | (10 << 15) | (8 << 20) +; RV64DZFH-NEXT: #NO_APP +; RV64DZFH-NEXT: fmv.h fa0, ft0 +; RV64DZFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload +; RV64DZFH-NEXT: addi sp, sp, 16 +; RV64DZFH-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm ".insn 0x4, 0x04000053 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "={ft0},{fa0},{fs0}"(half %a, half %1) + ret half %2 +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll b/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll new file mode 100644 index 00000000000000..a8d3515fe1890e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+zfinx -target-abi=ilp32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32FINX %s +; RUN: llc -mtriple=riscv64 -mattr=+zfinx -target-abi=lp64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64FINX %s + +@gf = external global float + +define float @constraint_r_float(float %a) nounwind { +; RV32FINX-LABEL: constraint_r_float: +; RV32FINX: # %bb.0: +; RV32FINX-NEXT: lui a1, %hi(gf) +; RV32FINX-NEXT: lw a1, %lo(gf)(a1) +; RV32FINX-NEXT: #APP +; RV32FINX-NEXT: fadd.s a0, a0, a1 +; RV32FINX-NEXT: #NO_APP +; RV32FINX-NEXT: ret +; +; RV64FINX-LABEL: constraint_r_float: +; RV64FINX: # %bb.0: +; RV64FINX-NEXT: lui a1, %hi(gf) +; RV64FINX-NEXT: lw a1, %lo(gf)(a1) +; RV64FINX-NEXT: #APP +; RV64FINX-NEXT: fadd.s a0, a0, a1 +; RV64FINX-NEXT: #NO_APP +; RV64FINX-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm "fadd.s $0, $1, $2", "=r,r,r"(float %a, float %1) + ret float %2 +} + +define float @constraint_cr_float(float %a) nounwind { +; RV32FINX-LABEL: constraint_cr_float: +; RV32FINX: # %bb.0: +; RV32FINX-NEXT: lui a1, %hi(gf) +; RV32FINX-NEXT: lw a1, %lo(gf)(a1) +; RV32FINX-NEXT: #APP +; RV32FINX-NEXT: fadd.s a0, a0, a1 +; RV32FINX-NEXT: #NO_APP +; RV32FINX-NEXT: ret +; +; RV64FINX-LABEL: constraint_cr_float: +; RV64FINX: # %bb.0: +; RV64FINX-NEXT: lui a1, %hi(gf) +; RV64FINX-NEXT: lw a1, %lo(gf)(a1) +; RV64FINX-NEXT: #APP +; RV64FINX-NEXT: fadd.s a0, a0, a1 +; RV64FINX-NEXT: #NO_APP +; RV64FINX-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm "fadd.s $0, $1, $2", "=^cr,cr,cr"(float %a, float %1) + ret float %2 +} + +define float @constraint_float_abi_name(float %a) nounwind { +; RV32FINX-LABEL: constraint_float_abi_name: +; RV32FINX: # %bb.0: +; RV32FINX-NEXT: addi sp, sp, -16 +; RV32FINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32FINX-NEXT: lui a1, %hi(gf) +; RV32FINX-NEXT: lw s0, %lo(gf)(a1) +; RV32FINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32FINX-NEXT: #APP +; RV32FINX-NEXT: fadd.s t0, a0, s0 +; RV32FINX-NEXT: #NO_APP +; RV32FINX-NEXT: mv a0, t0 +; RV32FINX-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32FINX-NEXT: addi sp, sp, 16 +; RV32FINX-NEXT: ret +; +; RV64FINX-LABEL: constraint_float_abi_name: +; RV64FINX: # %bb.0: +; RV64FINX-NEXT: addi sp, sp, -16 +; RV64FINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64FINX-NEXT: lui a1, %hi(gf) +; RV64FINX-NEXT: lw s0, %lo(gf)(a1) +; RV64FINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV64FINX-NEXT: #APP +; RV64FINX-NEXT: fadd.s t0, a0, s0 +; RV64FINX-NEXT: #NO_APP +; RV64FINX-NEXT: mv a0, t0 +; RV64FINX-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64FINX-NEXT: addi sp, sp, 16 +; RV64FINX-NEXT: ret + %1 = load float, ptr @gf + %2 = tail call float asm "fadd.s $0, $1, $2", "={t0},{a0},{s0}"(float %a, float %1) + ret float %2 +} + diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll b/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll new file mode 100644 index 00000000000000..f9707c6c8995dc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32 | FileCheck -check-prefix=RV32ZHINX %s +; RUN: llc -mtriple=riscv64 -mattr=+zhinx -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64 | FileCheck -check-prefix=RV64ZHINX %s +; RUN: llc -mtriple=riscv32 -mattr=+zdinx,+zhinx -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32 | FileCheck -check-prefix=RV32DINXZHINX %s +; RUN: llc -mtriple=riscv64 -mattr=+zdinx,+zhinx -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64 | FileCheck -check-prefix=RV64DINXZHINX %s + +@gh = external global half + +define half @constraint_r_half(half %a) nounwind { +; RV32ZHINX-LABEL: constraint_r_half: +; RV32ZHINX: # %bb.0: +; RV32ZHINX-NEXT: lui a1, %hi(gh) +; RV32ZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV32ZHINX-NEXT: #APP +; RV32ZHINX-NEXT: fadd.h a0, a0, a1 +; RV32ZHINX-NEXT: #NO_APP +; RV32ZHINX-NEXT: ret +; +; RV64ZHINX-LABEL: constraint_r_half: +; RV64ZHINX: # %bb.0: +; RV64ZHINX-NEXT: lui a1, %hi(gh) +; RV64ZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV64ZHINX-NEXT: #APP +; RV64ZHINX-NEXT: fadd.h a0, a0, a1 +; RV64ZHINX-NEXT: #NO_APP +; RV64ZHINX-NEXT: ret +; +; RV32DINXZHINX-LABEL: constraint_r_half: +; RV32DINXZHINX: # %bb.0: +; RV32DINXZHINX-NEXT: lui a1, %hi(gh) +; RV32DINXZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV32DINXZHINX-NEXT: #APP +; RV32DINXZHINX-NEXT: fadd.h a0, a0, a1 +; RV32DINXZHINX-NEXT: #NO_APP +; RV32DINXZHINX-NEXT: ret +; +; RV64DINXZHINX-LABEL: constraint_r_half: +; RV64DINXZHINX: # %bb.0: +; RV64DINXZHINX-NEXT: lui a1, %hi(gh) +; RV64DINXZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV64DINXZHINX-NEXT: #APP +; RV64DINXZHINX-NEXT: fadd.h a0, a0, a1 +; RV64DINXZHINX-NEXT: #NO_APP +; RV64DINXZHINX-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm "fadd.h $0, $1, $2", "=r,r,r"(half %a, half %1) + ret half %2 +} + +define half @constraint_cr_half(half %a) nounwind { +; RV32ZHINX-LABEL: constraint_cr_half: +; RV32ZHINX: # %bb.0: +; RV32ZHINX-NEXT: lui a1, %hi(gh) +; RV32ZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV32ZHINX-NEXT: #APP +; RV32ZHINX-NEXT: fadd.h a0, a0, a1 +; RV32ZHINX-NEXT: #NO_APP +; RV32ZHINX-NEXT: ret +; +; RV64ZHINX-LABEL: constraint_cr_half: +; RV64ZHINX: # %bb.0: +; RV64ZHINX-NEXT: lui a1, %hi(gh) +; RV64ZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV64ZHINX-NEXT: #APP +; RV64ZHINX-NEXT: fadd.h a0, a0, a1 +; RV64ZHINX-NEXT: #NO_APP +; RV64ZHINX-NEXT: ret +; +; RV32DINXZHINX-LABEL: constraint_cr_half: +; RV32DINXZHINX: # %bb.0: +; RV32DINXZHINX-NEXT: lui a1, %hi(gh) +; RV32DINXZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV32DINXZHINX-NEXT: #APP +; RV32DINXZHINX-NEXT: fadd.h a0, a0, a1 +; RV32DINXZHINX-NEXT: #NO_APP +; RV32DINXZHINX-NEXT: ret +; +; RV64DINXZHINX-LABEL: constraint_cr_half: +; RV64DINXZHINX: # %bb.0: +; RV64DINXZHINX-NEXT: lui a1, %hi(gh) +; RV64DINXZHINX-NEXT: lh a1, %lo(gh)(a1) +; RV64DINXZHINX-NEXT: #APP +; RV64DINXZHINX-NEXT: fadd.h a0, a0, a1 +; RV64DINXZHINX-NEXT: #NO_APP +; RV64DINXZHINX-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm "fadd.h $0, $1, $2", "=^cr,^cr,^cr"(half %a, half %1) + ret half %2 +} + +define half @constraint_half_abi_name(half %a) nounwind { +; RV32ZHINX-LABEL: constraint_half_abi_name: +; RV32ZHINX: # %bb.0: +; RV32ZHINX-NEXT: addi sp, sp, -16 +; RV32ZHINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZHINX-NEXT: lui a1, %hi(gh) +; RV32ZHINX-NEXT: lh s0, %lo(gh)(a1) +; RV32ZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 +; RV32ZHINX-NEXT: #APP +; RV32ZHINX-NEXT: fadd.s t0, a0, s0 +; RV32ZHINX-NEXT: #NO_APP +; RV32ZHINX-NEXT: mv a0, t0 +; RV32ZHINX-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32ZHINX-NEXT: addi sp, sp, 16 +; RV32ZHINX-NEXT: ret +; +; RV64ZHINX-LABEL: constraint_half_abi_name: +; RV64ZHINX: # %bb.0: +; RV64ZHINX-NEXT: addi sp, sp, -16 +; RV64ZHINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64ZHINX-NEXT: lui a1, %hi(gh) +; RV64ZHINX-NEXT: lh s0, %lo(gh)(a1) +; RV64ZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 +; RV64ZHINX-NEXT: #APP +; RV64ZHINX-NEXT: fadd.s t0, a0, s0 +; RV64ZHINX-NEXT: #NO_APP +; RV64ZHINX-NEXT: mv a0, t0 +; RV64ZHINX-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64ZHINX-NEXT: addi sp, sp, 16 +; RV64ZHINX-NEXT: ret +; +; RV32DINXZHINX-LABEL: constraint_half_abi_name: +; RV32DINXZHINX: # %bb.0: +; RV32DINXZHINX-NEXT: addi sp, sp, -16 +; RV32DINXZHINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32DINXZHINX-NEXT: lui a1, %hi(gh) +; RV32DINXZHINX-NEXT: lh s0, %lo(gh)(a1) +; RV32DINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 +; RV32DINXZHINX-NEXT: #APP +; RV32DINXZHINX-NEXT: fadd.s t0, a0, s0 +; RV32DINXZHINX-NEXT: #NO_APP +; RV32DINXZHINX-NEXT: mv a0, t0 +; RV32DINXZHINX-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32DINXZHINX-NEXT: addi sp, sp, 16 +; RV32DINXZHINX-NEXT: ret +; +; RV64DINXZHINX-LABEL: constraint_half_abi_name: +; RV64DINXZHINX: # %bb.0: +; RV64DINXZHINX-NEXT: addi sp, sp, -16 +; RV64DINXZHINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64DINXZHINX-NEXT: lui a1, %hi(gh) +; RV64DINXZHINX-NEXT: lh s0, %lo(gh)(a1) +; RV64DINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 +; RV64DINXZHINX-NEXT: #APP +; RV64DINXZHINX-NEXT: fadd.s t0, a0, s0 +; RV64DINXZHINX-NEXT: #NO_APP +; RV64DINXZHINX-NEXT: mv a0, t0 +; RV64DINXZHINX-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64DINXZHINX-NEXT: addi sp, sp, 16 +; RV64DINXZHINX-NEXT: ret + %1 = load half, ptr @gh + %2 = tail call half asm "fadd.s $0, $1, $2", "={t0},{a0},{s0}"(half %a, half %1) + ret half %2 +} diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll index cb240d2dc68d0c..79266743a1d051 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm.ll @@ -56,6 +56,29 @@ define i32 @constraint_r_zero(i32 %a) nounwind { ret i32 %2 } +define i32 @constraint_cr(i32 %a) nounwind { +; RV32I-LABEL: constraint_cr: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, %hi(gi) +; RV32I-NEXT: lw a1, %lo(gi)(a1) +; RV32I-NEXT: #APP +; RV32I-NEXT: c.add a0, a0, a1 +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret +; +; RV64I-LABEL: constraint_cr: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, %hi(gi) +; RV64I-NEXT: lw a1, %lo(gi)(a1) +; RV64I-NEXT: #APP +; RV64I-NEXT: c.add a0, a0, a1 +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "c.add $0, $1, $2", "=^cr,0,^cr"(i32 %a, i32 %1) + ret i32 %2 +} + define i32 @constraint_i(i32 %a) nounwind { ; RV32I-LABEL: constraint_i: ; RV32I: # %bb.0: @@ -215,6 +238,49 @@ define i32 @modifier_i_reg(i32 %a, i32 %b) nounwind { ret i32 %1 } +;; `.insn 0x4, 0x33 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)` is the +;; raw encoding of `add` + +define i32 @modifier_N_reg(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: modifier_N_reg: +; RV32I: # %bb.0: +; RV32I-NEXT: #APP +; RV32I-NEXT: .insn 0x4, 0x33 | (10 << 7) | (10 << 15) | (11 << 20) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret +; +; RV64I-LABEL: modifier_N_reg: +; RV64I: # %bb.0: +; RV64I-NEXT: #APP +; RV64I-NEXT: .insn 0x4, 0x33 | (10 << 7) | (10 << 15) | (11 << 20) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret + %1 = tail call i32 asm ".insn 0x4, 0x33 | (${0:N} << 7) | (${1:N} << 15) | (${2:N} << 20)", "=r,r,r"(i32 %a, i32 %b) + ret i32 %1 +} + +;; `.insn 0x2, 0x9422 | (${0:N} << 7) | (${2:N} << 2)` is the raw encoding of +;; `c.add` (note the constraint that the first input should be the same as the +;; output). + +define i32 @modifier_N_with_cr_reg(i32 %a, i32 %b) nounwind { +; RV32I-LABEL: modifier_N_with_cr_reg: +; RV32I: # %bb.0: +; RV32I-NEXT: #APP +; RV32I-NEXT: .insn 0x2, 0x9422 | (10 << 7) | (11 << 2) +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: ret +; +; RV64I-LABEL: modifier_N_with_cr_reg: +; RV64I: # %bb.0: +; RV64I-NEXT: #APP +; RV64I-NEXT: .insn 0x2, 0x9422 | (10 << 7) | (11 << 2) +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ret + %1 = tail call i32 asm ".insn 0x2, 0x9422 | (${0:N} << 7) | (${2:N} << 2)", "=^cr,0,^cr"(i32 %a, i32 %b) + ret i32 %1 +} + define void @operand_global() nounwind { ; RV32I-LABEL: operand_global: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/RISCV/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..de87b10d387338 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/naked-fn-with-frame-pointer.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple riscv32 | FileCheck %s -check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple riscv64 | FileCheck %s -check-prefixes=CHECK-64 + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-LABEL: naked: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: call main +; +; CHECK-64-LABEL: naked: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: call main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-LABEL: normal: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: addi sp, sp, -16 +; CHECK-32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK-32-NEXT: .cfi_offset ra, -4 +; CHECK-32-NEXT: .cfi_offset s0, -8 +; CHECK-32-NEXT: addi s0, sp, 16 +; CHECK-32-NEXT: .cfi_def_cfa s0, 0 +; CHECK-32-NEXT: call main +; +; CHECK-64-LABEL: normal: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: addi sp, sp, -16 +; CHECK-64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; CHECK-64-NEXT: .cfi_offset ra, -8 +; CHECK-64-NEXT: .cfi_offset s0, -16 +; CHECK-64-NEXT: addi s0, sp, 16 +; CHECK-64-NEXT: .cfi_def_cfa s0, 0 +; CHECK-64-NEXT: call main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/RISCV/rv64-trampoline.ll b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll new file mode 100644 index 00000000000000..ba184063265098 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64 %s +; RUN: llc -mtriple=riscv64-unknown-linux-gnu -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64-LINUX %s + +declare void @llvm.init.trampoline(ptr, ptr, ptr) +declare ptr @llvm.adjust.trampoline(ptr) +declare i64 @f(ptr nest, i64) + +define i64 @test0(i64 %n, ptr %p) nounwind { +; RV64-LABEL: test0: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: mv s0, a0 +; RV64-NEXT: lui a0, %hi(f) +; RV64-NEXT: addi a0, a0, %lo(f) +; RV64-NEXT: sd a0, 32(sp) +; RV64-NEXT: li a0, 919 +; RV64-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-NEXT: ld a2, %lo(.LCPI0_0)(a2) +; RV64-NEXT: lui a3, 6203 +; RV64-NEXT: addi a3, a3, 643 +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: sw a3, 12(sp) +; RV64-NEXT: sd a2, 16(sp) +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: addi s1, sp, 8 +; RV64-NEXT: call __clear_cache +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: jalr s1 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret +; +; RV64-LINUX-LABEL: test0: +; RV64-LINUX: # %bb.0: +; RV64-LINUX-NEXT: addi sp, sp, -64 +; RV64-LINUX-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-LINUX-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-LINUX-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-LINUX-NEXT: mv s0, a0 +; RV64-LINUX-NEXT: lui a0, %hi(f) +; RV64-LINUX-NEXT: addi a0, a0, %lo(f) +; RV64-LINUX-NEXT: sd a0, 32(sp) +; RV64-LINUX-NEXT: li a0, 919 +; RV64-LINUX-NEXT: lui a2, %hi(.LCPI0_0) +; RV64-LINUX-NEXT: ld a2, %lo(.LCPI0_0)(a2) +; RV64-LINUX-NEXT: lui a3, 6203 +; RV64-LINUX-NEXT: addi a3, a3, 643 +; RV64-LINUX-NEXT: sw a0, 8(sp) +; RV64-LINUX-NEXT: sw a3, 12(sp) +; RV64-LINUX-NEXT: sd a2, 16(sp) +; RV64-LINUX-NEXT: sd a1, 24(sp) +; RV64-LINUX-NEXT: addi a1, sp, 24 +; RV64-LINUX-NEXT: addi a0, sp, 8 +; RV64-LINUX-NEXT: addi s1, sp, 8 +; RV64-LINUX-NEXT: li a2, 0 +; RV64-LINUX-NEXT: call __riscv_flush_icache +; RV64-LINUX-NEXT: mv a0, s0 +; RV64-LINUX-NEXT: jalr s1 +; RV64-LINUX-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-LINUX-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-LINUX-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-LINUX-NEXT: addi sp, sp, 64 +; RV64-LINUX-NEXT: ret + %alloca = alloca [32 x i8], align 8 + call void @llvm.init.trampoline(ptr %alloca, ptr @f, ptr %p) + %tramp = call ptr @llvm.adjust.trampoline(ptr %alloca) + %ret = call i64 %tramp(i64 %n) + ret i64 %ret + +} diff --git a/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll b/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll index 95695aa697764a..18bd41a210f53f 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll +++ b/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll @@ -1,6 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s \ ; RUN: -target-abi=ilp32 -mattr=+zhinx | FileCheck %s + +;; These tests cover the use of `r` and `cr` constraints for floating point values on rv32. +;; +;; In particular, there is significant complexity around using paired GPRs for double values on rv32. + define dso_local void @zdinx_asm(ptr nocapture noundef writeonly %a, double noundef %b, double noundef %c) nounwind { ; CHECK-LABEL: zdinx_asm: ; CHECK: # %bb.0: # %entry @@ -50,3 +55,59 @@ entry: store half %0, ptr %arrayidx, align 8 ret void } + +define dso_local void @zdinx_asm_cr(ptr nocapture noundef writeonly %a, double noundef %b, double noundef %c) nounwind { +; CHECK-LABEL: zdinx_asm_cr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv a5, a4 +; CHECK-NEXT: mv s1, a2 +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: #APP +; CHECK-NEXT: fsgnjx.d a2, s0, a4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: sw a2, 8(a0) +; CHECK-NEXT: sw a3, 12(a0) +; CHECK-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds double, ptr %a, i32 1 + %0 = tail call double asm "fsgnjx.d $0, $1, $2", "=^cr,^cr,^cr"(double %b, double %c) + store double %0, ptr %arrayidx, align 8 + ret void +} + +define dso_local void @zfinx_asm_cr(ptr nocapture noundef writeonly %a, float noundef %b, float noundef %c) nounwind { +; CHECK-LABEL: zfinx_asm_cr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: fsgnjx.s a1, a1, a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: sw a1, 4(a0) +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds float, ptr %a, i32 1 + %0 = tail call float asm "fsgnjx.s $0, $1, $2", "=^cr,^cr,^cr"(float %b, float %c) + store float %0, ptr %arrayidx, align 8 + ret void +} + +define dso_local void @zhinx_asm_cr(ptr nocapture noundef writeonly %a, half noundef %b, half noundef %c) nounwind { +; CHECK-LABEL: zhinx_asm_cr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: fsgnjx.h a1, a1, a2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: ret +entry: + %arrayidx = getelementptr inbounds half, ptr %a, i32 1 + %0 = tail call half asm "fsgnjx.h $0, $1, $2", "=^cr,^cr,^cr"(half %b, half %c) + store half %0, ptr %arrayidx, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SPARC/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/SPARC/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..af97c573625b52 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/naked-fn-with-frame-pointer.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple sparc | FileCheck %s -check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple sparc64 | FileCheck %s -check-prefixes=CHECK-64 + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-LABEL: naked: +; CHECK-32: .cfi_startproc +; CHECK-32-NEXT: ! %bb.0: +; CHECK-32-NEXT: call main +; CHECK-32-NEXT: nop +; +; CHECK-64-LABEL: naked: +; CHECK-64: .cfi_startproc +; CHECK-64-NEXT: ! %bb.0: +; CHECK-64-NEXT: call main +; CHECK-64-NEXT: nop + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-LABEL: normal: +; CHECK-32: .cfi_startproc +; CHECK-32-NEXT: ! %bb.0: +; CHECK-32-NEXT: save %sp, -96, %sp +; CHECK-32-NEXT: .cfi_def_cfa_register %fp +; CHECK-32-NEXT: .cfi_window_save +; CHECK-32-NEXT: .cfi_register %o7, %i7 +; CHECK-32-NEXT: call main +; CHECK-32-NEXT: nop +; +; CHECK-64-LABEL: normal: +; CHECK-64: .cfi_startproc +; CHECK-64-NEXT: ! %bb.0: +; CHECK-64-NEXT: save %sp, -176, %sp +; CHECK-64-NEXT: .cfi_def_cfa_register %fp +; CHECK-64-NEXT: .cfi_window_save +; CHECK-64-NEXT: .cfi_register %o7, %i7 +; CHECK-64-NEXT: call main +; CHECK-64-NEXT: nop + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/SystemZ/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/SystemZ/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..3eb396e4044206 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/naked-fn-with-frame-pointer.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple s390x | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: # %bb.0: +; CHECK-NEXT: brasl %r14, main@PLT + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r11, %r15, 88(%r15) +; CHECK-NEXT: .cfi_offset %r11, -72 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: lgr %r11, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r11 +; CHECK-NEXT: brasl %r14, main@PLT + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll deleted file mode 100644 index 8bcf87130c5400..00000000000000 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll +++ /dev/null @@ -1,150 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s - -; int test1() { -; return 0; -; } -define i32 @test1() "sign-return-address"="non-leaf" { -; CHECK-LABEL: test1: -; CHECK: .cfi_sections .debug_frame -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: @ %bb.0: @ %entry -; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 4 -; CHECK-NEXT: .cfi_offset ra_auth_code, -4 -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push.w {r11, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset lr, -8 -; CHECK-NEXT: .cfi_offset r11, -12 -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .cfi_def_cfa_register r11 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: pop.w {r11, lr} -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr -entry: - ret i32 0 -} - -; void foo(int n) { -; int a[n]; -; bar(a); -; } -define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" { -; CHECK-LABEL: test2: -; CHECK: .cfi_startproc -; CHECK-NEXT: @ %bb.0: @ %entry -; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r7, ra_auth_code} -; CHECK-NEXT: push.w {r4, r7, r12} -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset ra_auth_code, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .cfi_offset r4, -12 -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push.w {r11, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset lr, -16 -; CHECK-NEXT: .cfi_offset r11, -20 -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .cfi_def_cfa_register r11 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: movs r1, #7 -; CHECK-NEXT: add.w r0, r1, r0, lsl #2 -; CHECK-NEXT: bic r0, r0, #7 -; CHECK-NEXT: sub.w r0, sp, r0 -; CHECK-NEXT: mov sp, r0 -; CHECK-NEXT: bl take_ptr -; CHECK-NEXT: mov sp, r11 -; CHECK-NEXT: pop.w {r11, lr} -; CHECK-NEXT: pop.w {r4, r7, r12} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr -entry: - %vla = alloca i32, i32 %n, align 4 - call void @take_ptr(ptr noundef nonnull %vla) - ret void -} - -; void test3(int c, float e, int z) { -; if (c) -; knr(); -; take_ptr(alloca(z)); -; if (e) -; knr(); -; } -define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" { -; CHECK-LABEL: test3: -; CHECK: .cfi_startproc -; CHECK-NEXT: @ %bb.0: @ %entry -; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r12} -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset ra_auth_code, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .cfi_offset r6, -12 -; CHECK-NEXT: .cfi_offset r5, -16 -; CHECK-NEXT: .cfi_offset r4, -20 -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push.w {r11, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 28 -; CHECK-NEXT: .cfi_offset lr, -24 -; CHECK-NEXT: .cfi_offset r11, -28 -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .cfi_def_cfa_register r11 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: it ne -; CHECK-NEXT: blne knr -; CHECK-NEXT: adds r0, r5, #7 -; CHECK-NEXT: bic r0, r0, #7 -; CHECK-NEXT: sub.w r0, sp, r0 -; CHECK-NEXT: mov sp, r0 -; CHECK-NEXT: bl take_ptr -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: bl __aeabi_fcmpeq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bleq knr -; CHECK-NEXT: mov sp, r11 -; CHECK-NEXT: pop.w {r11, lr} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr -entry: - %tobool.not = icmp eq i32 %c, 0 - br i1 %tobool.not, label %if.end, label %if.then - -if.then: ; preds = %entry - tail call void @knr() - br label %if.end - -if.end: ; preds = %if.then, %entry - %0 = alloca i8, i32 %z, align 8 - call void @take_ptr(ptr noundef nonnull %0) - %tobool1 = fcmp une float %e, 0.000000e+00 - br i1 %tobool1, label %if.then2, label %if.end3 - -if.then2: ; preds = %if.end - call void @knr() - br label %if.end3 - -if.end3: ; preds = %if.then2, %if.end - ret void -} - -declare void @knr(...) -declare void @take_ptr(ptr noundef) diff --git a/llvm/test/CodeGen/VE/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/VE/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..3b88bea46c4dd0 --- /dev/null +++ b/llvm/test/CodeGen/VE/naked-fn-with-frame-pointer.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple ve | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, main@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, main@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -240(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: lea %s0, main@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, main@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/WebAssembly/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/WebAssembly/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..fcd42e8cbfb9f5 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/naked-fn-with-frame-pointer.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple wasm32 | FileCheck %s -check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple wasm64 | FileCheck %s -check-prefixes=CHECK-64 + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-LABEL: naked: +; CHECK-32: .functype naked () -> () +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: call main +; CHECK-32-NEXT: unreachable +; +; CHECK-64-LABEL: naked: +; CHECK-64: .functype naked () -> () +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: call main +; CHECK-64-NEXT: unreachable + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-LABEL: normal: +; CHECK-32: .functype normal () -> () +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: call main +; CHECK-32-NEXT: unreachable +; +; CHECK-64-LABEL: normal: +; CHECK-64: .functype normal () -> () +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: call main +; CHECK-64-NEXT: unreachable + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll index 90733dfb8465ef..44ab33ad67f272 100644 --- a/llvm/test/CodeGen/X86/avx2-arith.ll +++ b/llvm/test/CodeGen/X86/avx2-arith.ll @@ -122,7 +122,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; CHECK-LABEL: mul_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; CHECK-NEXT: vpand %ymm1, %ymm2, %ymm3 +; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm3 ; CHECK-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 ; CHECK-NEXT: vpand %ymm2, %ymm3, %ymm3 ; CHECK-NEXT: vpandn %ymm1, %ymm2, %ymm1 diff --git a/llvm/test/CodeGen/X86/combine-sra.ll b/llvm/test/CodeGen/X86/combine-sra.ll index 7eee418742ddb5..c982884314f623 100644 --- a/llvm/test/CodeGen/X86/combine-sra.ll +++ b/llvm/test/CodeGen/X86/combine-sra.ll @@ -725,12 +725,11 @@ define <4 x i64> @combine_vec4i64_ashr_clamped(<4 x i64> %x, <4 x i64> %y) { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm4 ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm6 +; SSE41-NEXT: pxor %xmm7, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002259519,9223372039002259519] -; SSE41-NEXT: movdqa %xmm8, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 -; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483711,2147483711,2147483711,2147483711] ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 diff --git a/llvm/test/CodeGen/X86/fma.ll b/llvm/test/CodeGen/X86/fma.ll index c55f50e97786a2..f26960b069b0e8 100644 --- a/llvm/test/CodeGen/X86/fma.ll +++ b/llvm/test/CodeGen/X86/fma.ll @@ -2096,6 +2096,142 @@ entry: ret <8 x double> %call } +define float @constant_fold_f32() { +; FMA32-LABEL: constant_fold_f32: +; FMA32: ## %bb.0: +; FMA32-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A] +; FMA32-NEXT: ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; FMA32-NEXT: retl ## encoding: [0xc3] +; +; FMACALL32-LABEL: constant_fold_f32: +; FMACALL32: ## %bb.0: +; FMACALL32-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ## encoding: [0xd9,0x05,A,A,A,A] +; FMACALL32-NEXT: ## fixup A - offset: 2, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; FMACALL32-NEXT: retl ## encoding: [0xc3] +; +; FMA64-LABEL: constant_fold_f32: +; FMA64: ## %bb.0: +; FMA64-NEXT: vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] +; FMA64-NEXT: ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] +; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; FMA64-NEXT: retq ## encoding: [0xc3] +; +; FMACALL64-LABEL: constant_fold_f32: +; FMACALL64: ## %bb.0: +; FMACALL64-NEXT: movss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] +; FMACALL64-NEXT: ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A] +; FMACALL64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; FMACALL64-NEXT: retq ## encoding: [0xc3] +; +; AVX512-LABEL: constant_fold_f32: +; AVX512: ## %bb.0: +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] +; AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] +; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: retq ## encoding: [0xc3] +; +; AVX512VL-LABEL: constant_fold_f32: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = [1.02E+3,0.0E+0,0.0E+0,0.0E+0] +; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512VL-NEXT: retq ## encoding: [0xc3] + %r = call float @llvm.fma.f32(float 5.000000e+01, float 2.000000e+01, float 2.000000e+01) + ret float %r +} + +define <4 x float> @constant_fold_v4f32() { +; FMA32-LABEL: constant_fold_v4f32: +; FMA32: ## %bb.0: +; FMA32-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] +; FMA32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; FMA32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; FMA32-NEXT: retl ## encoding: [0xc3] +; +; FMACALL32-LABEL: constant_fold_v4f32: +; FMACALL32: ## %bb.0: +; FMACALL32-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] +; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; FMACALL32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; FMACALL32-NEXT: retl ## encoding: [0xc3] +; +; FMA64-LABEL: constant_fold_v4f32: +; FMA64: ## %bb.0: +; FMA64-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] +; FMA64-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; FMA64-NEXT: retq ## encoding: [0xc3] +; +; FMACALL64-LABEL: constant_fold_v4f32: +; FMACALL64: ## %bb.0: +; FMACALL64-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] +; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] +; FMACALL64-NEXT: ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; FMACALL64-NEXT: retq ## encoding: [0xc3] +; +; AVX512-LABEL: constant_fold_v4f32: +; AVX512: ## %bb.0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] +; AVX512-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: retq ## encoding: [0xc3] +; +; AVX512VL-LABEL: constant_fold_v4f32: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,4.9E+2,1.18E+3,2.07E+3] +; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512VL-NEXT: retq ## encoding: [0xc3] + %r = call <4 x float> @llvm.fma.v4f32(<4 x float> , <4 x float> , <4 x float> ) + ret <4 x float> %r +} + +define <2 x double> @constant_fold_v2f64() { +; FMA32-LABEL: constant_fold_v2f64: +; FMA32: ## %bb.0: +; FMA32-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] +; FMA32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; FMA32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; FMA32-NEXT: retl ## encoding: [0xc3] +; +; FMACALL32-LABEL: constant_fold_v2f64: +; FMACALL32: ## %bb.0: +; FMACALL32-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] +; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; FMACALL32-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; FMACALL32-NEXT: retl ## encoding: [0xc3] +; +; FMA64-LABEL: constant_fold_v2f64: +; FMA64: ## %bb.0: +; FMA64-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] +; FMA64-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; FMA64-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; FMA64-NEXT: retq ## encoding: [0xc3] +; +; FMACALL64-LABEL: constant_fold_v2f64: +; FMACALL64: ## %bb.0: +; FMACALL64-NEXT: movaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] +; FMACALL64-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] +; FMACALL64-NEXT: ## fixup A - offset: 3, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; FMACALL64-NEXT: retq ## encoding: [0xc3] +; +; AVX512-LABEL: constant_fold_v2f64: +; AVX512: ## %bb.0: +; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] +; AVX512-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: retq ## encoding: [0xc3] +; +; AVX512VL-LABEL: constant_fold_v2f64: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [4.1E+2,1.4E+3] +; AVX512VL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512VL-NEXT: retq ## encoding: [0xc3] + %r = call <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> , <2 x double> ) + ret <2 x double> %r +} + declare float @llvm.fma.f32(float, float, float) declare double @llvm.fma.f64(double, double, double) declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll index 6fd3db3464decb..ee83a79b6dd550 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -2369,8 +2369,8 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin ; SSE41-NEXT: psrlw $1, %xmm1 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: pand %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm1, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm5 ; SSE41-NEXT: pand %xmm3, %xmm5 @@ -2391,7 +2391,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 ; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2 @@ -2432,7 +2432,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin ; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -2450,7 +2450,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -2592,8 +2592,8 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw ; SSE41-NEXT: psrlw $1, %xmm1 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm3 ; SSE41-NEXT: movdqa %xmm1, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm3, %xmm5 ; SSE41-NEXT: pand %xmm2, %xmm5 @@ -2616,7 +2616,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4 ; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4 ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -2659,7 +2659,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw ; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -2677,7 +2677,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -2823,8 +2823,8 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind ; SSE41-NEXT: psrlw $1, %xmm0 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: pand %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm5 ; SSE41-NEXT: pand %xmm3, %xmm5 @@ -2846,7 +2846,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4 ; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2 @@ -2889,7 +2889,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind ; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4 -; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0 ; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2],xmm4[2],xmm0[4],xmm4[4],xmm0[6],xmm4[6],xmm0[8],xmm4[8],xmm0[10],xmm4[10],xmm0[12],xmm4[12],xmm0[14],xmm4[14] ; XOP-FALLBACK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 @@ -2908,7 +2908,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4 -; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2],xmm4[2],xmm0[4],xmm4[4],xmm0[6],xmm4[6],xmm0[8],xmm4[8],xmm0[10],xmm4[10],xmm0[12],xmm4[12],xmm0[14],xmm4[14] ; XOPAVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 @@ -3054,8 +3054,8 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind ; SSE41-NEXT: psrlw $1, %xmm1 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: pand %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm1, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm5 ; SSE41-NEXT: pand %xmm3, %xmm5 @@ -3077,7 +3077,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 ; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2 @@ -3120,7 +3120,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind ; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -3139,7 +3139,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -3287,8 +3287,8 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; SSE41-NEXT: psrlw $1, %xmm0 ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: pand %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm5 ; SSE41-NEXT: pand %xmm3, %xmm5 @@ -3311,7 +3311,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 ; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2 @@ -3356,7 +3356,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -3376,7 +3376,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4 ; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4 -; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14] ; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll index 5a1c4c8a52c829..b4e8f0a2301674 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll @@ -1914,7 +1914,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm5, %xmm7, %xmm8 +; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm8 ; AVX1-NEXT: vpmaddubsw %xmm8, %xmm1, %xmm8 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8 ; AVX1-NEXT: vpandn %xmm5, %xmm7, %xmm5 @@ -1922,7 +1922,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin ; AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm8, %xmm1 ; AVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm5 +; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm5 ; AVX1-NEXT: vpmaddubsw %xmm5, %xmm3, %xmm5 ; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm4, %xmm7, %xmm4 @@ -1944,7 +1944,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin ; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX2-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -1974,14 +1974,14 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin ; XOP-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] ; XOP-NEXT: vpandn %xmm5, %xmm7, %xmm8 ; XOP-NEXT: vpmaddubsw %xmm8, %xmm1, %xmm8 -; XOP-NEXT: vpand %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpand %xmm7, %xmm5, %xmm5 ; XOP-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1 ; XOP-NEXT: vmovdqa {{.*#+}} xmm5 = [0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30] ; XOP-NEXT: vpperm %xmm5, %xmm8, %xmm1, %xmm1 ; XOP-NEXT: vpor %xmm6, %xmm4, %xmm4 ; XOP-NEXT: vpandn %xmm4, %xmm7, %xmm6 ; XOP-NEXT: vpmaddubsw %xmm6, %xmm2, %xmm6 -; XOP-NEXT: vpand %xmm4, %xmm7, %xmm4 +; XOP-NEXT: vpand %xmm7, %xmm4, %xmm4 ; XOP-NEXT: vpmaddubsw %xmm4, %xmm2, %xmm2 ; XOP-NEXT: vpperm %xmm5, %xmm6, %xmm2, %xmm2 ; XOP-NEXT: vpaddb %xmm3, %xmm2, %xmm2 @@ -1999,7 +1999,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -2088,7 +2088,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpor %xmm4, %xmm6, %xmm6 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm8 +; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm8 ; AVX1-NEXT: vpmaddubsw %xmm8, %xmm1, %xmm8 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8 ; AVX1-NEXT: vpandn %xmm6, %xmm7, %xmm6 @@ -2096,7 +2096,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm8, %xmm1 ; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm5 +; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm5 ; AVX1-NEXT: vpmaddubsw %xmm5, %xmm3, %xmm5 ; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm4, %xmm7, %xmm4 @@ -2120,7 +2120,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 +; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm4 ; AVX2-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm4 ; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 @@ -2150,14 +2150,14 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; XOP-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] ; XOP-NEXT: vpandn %xmm5, %xmm7, %xmm8 ; XOP-NEXT: vpmaddubsw %xmm8, %xmm1, %xmm8 -; XOP-NEXT: vpand %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpand %xmm7, %xmm5, %xmm5 ; XOP-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1 ; XOP-NEXT: vmovdqa {{.*#+}} xmm5 = [0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30] ; XOP-NEXT: vpperm %xmm5, %xmm8, %xmm1, %xmm1 ; XOP-NEXT: vpor %xmm6, %xmm4, %xmm4 ; XOP-NEXT: vpandn %xmm4, %xmm7, %xmm6 ; XOP-NEXT: vpmaddubsw %xmm6, %xmm2, %xmm6 -; XOP-NEXT: vpand %xmm4, %xmm7, %xmm4 +; XOP-NEXT: vpand %xmm7, %xmm4, %xmm4 ; XOP-NEXT: vpmaddubsw %xmm4, %xmm2, %xmm2 ; XOP-NEXT: vpperm %xmm5, %xmm6, %xmm2, %xmm2 ; XOP-NEXT: vpaddb %xmm3, %xmm2, %xmm2 @@ -2176,7 +2176,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 +; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4 ; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX512F-NEXT: vpand %ymm2, %ymm4, %ymm4 ; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 @@ -2266,7 +2266,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm5, %xmm7, %xmm8 +; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm8 ; AVX1-NEXT: vpmaddubsw %xmm8, %xmm0, %xmm8 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8 ; AVX1-NEXT: vpandn %xmm5, %xmm7, %xmm5 @@ -2274,7 +2274,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm0, %xmm8, %xmm0 ; AVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm5 +; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm5 ; AVX1-NEXT: vpmaddubsw %xmm5, %xmm3, %xmm5 ; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm4, %xmm7, %xmm4 @@ -2297,7 +2297,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind ; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpmaddubsw %ymm4, %ymm0, %ymm4 ; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX2-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -2328,14 +2328,14 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind ; XOP-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] ; XOP-NEXT: vpandn %xmm5, %xmm7, %xmm8 ; XOP-NEXT: vpmaddubsw %xmm8, %xmm0, %xmm8 -; XOP-NEXT: vpand %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpand %xmm7, %xmm5, %xmm5 ; XOP-NEXT: vpmaddubsw %xmm5, %xmm0, %xmm0 ; XOP-NEXT: vmovdqa {{.*#+}} xmm5 = [0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30] ; XOP-NEXT: vpperm %xmm5, %xmm8, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm6, %xmm4, %xmm4 ; XOP-NEXT: vpandn %xmm4, %xmm7, %xmm6 ; XOP-NEXT: vpmaddubsw %xmm6, %xmm1, %xmm6 -; XOP-NEXT: vpand %xmm4, %xmm7, %xmm4 +; XOP-NEXT: vpand %xmm7, %xmm4, %xmm4 ; XOP-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm1 ; XOP-NEXT: vpperm %xmm5, %xmm6, %xmm1, %xmm1 ; XOP-NEXT: vpaddb %xmm3, %xmm1, %xmm1 @@ -2354,7 +2354,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind ; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm0, %ymm4 ; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -2444,7 +2444,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm5, %xmm7, %xmm8 +; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm8 ; AVX1-NEXT: vpmaddubsw %xmm8, %xmm2, %xmm8 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8 ; AVX1-NEXT: vpandn %xmm5, %xmm7, %xmm5 @@ -2452,7 +2452,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind ; AVX1-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpor %xmm2, %xmm8, %xmm2 ; AVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm5 +; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm5 ; AVX1-NEXT: vpmaddubsw %xmm5, %xmm3, %xmm5 ; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm4, %xmm7, %xmm4 @@ -2475,7 +2475,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind ; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX2-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -2506,14 +2506,14 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind ; XOP-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] ; XOP-NEXT: vpandn %xmm5, %xmm7, %xmm8 ; XOP-NEXT: vpmaddubsw %xmm8, %xmm1, %xmm8 -; XOP-NEXT: vpand %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpand %xmm7, %xmm5, %xmm5 ; XOP-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1 ; XOP-NEXT: vmovdqa {{.*#+}} xmm5 = [0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30] ; XOP-NEXT: vpperm %xmm5, %xmm8, %xmm1, %xmm1 ; XOP-NEXT: vpor %xmm6, %xmm4, %xmm4 ; XOP-NEXT: vpandn %xmm4, %xmm7, %xmm6 ; XOP-NEXT: vpmaddubsw %xmm6, %xmm2, %xmm6 -; XOP-NEXT: vpand %xmm4, %xmm7, %xmm4 +; XOP-NEXT: vpand %xmm7, %xmm4, %xmm4 ; XOP-NEXT: vpmaddubsw %xmm4, %xmm2, %xmm2 ; XOP-NEXT: vpperm %xmm5, %xmm6, %xmm2, %xmm2 ; XOP-NEXT: vpaddb %xmm3, %xmm2, %xmm2 @@ -2532,7 +2532,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -2623,7 +2623,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm5, %xmm7, %xmm8 +; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm8 ; AVX1-NEXT: vpmaddubsw %xmm8, %xmm1, %xmm8 ; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm8 ; AVX1-NEXT: vpandn %xmm5, %xmm7, %xmm5 @@ -2631,7 +2631,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm8, %xmm1 ; AVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm5 +; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm5 ; AVX1-NEXT: vpmaddubsw %xmm5, %xmm2, %xmm5 ; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm4, %xmm7, %xmm4 @@ -2655,7 +2655,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX2-NEXT: vpandn %ymm2, %ymm3, %ymm2 @@ -2687,14 +2687,14 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; XOP-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] ; XOP-NEXT: vpandn %xmm5, %xmm7, %xmm8 ; XOP-NEXT: vpmaddubsw %xmm8, %xmm0, %xmm8 -; XOP-NEXT: vpand %xmm5, %xmm7, %xmm5 +; XOP-NEXT: vpand %xmm7, %xmm5, %xmm5 ; XOP-NEXT: vpmaddubsw %xmm5, %xmm0, %xmm0 ; XOP-NEXT: vmovdqa {{.*#+}} xmm5 = [0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30] ; XOP-NEXT: vpperm %xmm5, %xmm8, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm6, %xmm4, %xmm4 ; XOP-NEXT: vpandn %xmm4, %xmm7, %xmm6 ; XOP-NEXT: vpmaddubsw %xmm6, %xmm1, %xmm6 -; XOP-NEXT: vpand %xmm4, %xmm7, %xmm4 +; XOP-NEXT: vpand %xmm7, %xmm4, %xmm4 ; XOP-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm1 ; XOP-NEXT: vpperm %xmm5, %xmm6, %xmm1, %xmm1 ; XOP-NEXT: vpaddb %xmm3, %xmm1, %xmm1 @@ -2714,7 +2714,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4 +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm1, %ymm4 ; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm4 ; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm2 diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 8289e885618f7b..9b08d8baacee14 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -892,13 +892,13 @@ define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" ; CHECK-SKX-VBMI-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; CHECK-SKX-VBMI-NEXT: vpandn %ymm3, %ymm4, %ymm5 ; CHECK-SKX-VBMI-NEXT: vpmaddubsw %ymm5, %ymm1, %ymm5 -; CHECK-SKX-VBMI-NEXT: vpand %ymm3, %ymm4, %ymm3 +; CHECK-SKX-VBMI-NEXT: vpand %ymm4, %ymm3, %ymm3 ; CHECK-SKX-VBMI-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1 ; CHECK-SKX-VBMI-NEXT: vmovdqa {{.*#+}} ymm3 = [0,32,2,34,4,36,6,38,8,40,10,42,12,44,14,46,16,48,18,50,20,52,22,54,24,56,26,58,28,60,30,62] ; CHECK-SKX-VBMI-NEXT: vpermt2b %ymm5, %ymm3, %ymm1 ; CHECK-SKX-VBMI-NEXT: vpandn %ymm2, %ymm4, %ymm5 ; CHECK-SKX-VBMI-NEXT: vpmaddubsw %ymm5, %ymm0, %ymm5 -; CHECK-SKX-VBMI-NEXT: vpand %ymm2, %ymm4, %ymm2 +; CHECK-SKX-VBMI-NEXT: vpand %ymm4, %ymm2, %ymm2 ; CHECK-SKX-VBMI-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm0 ; CHECK-SKX-VBMI-NEXT: vpermt2b %ymm5, %ymm3, %ymm0 ; CHECK-SKX-VBMI-NEXT: vmovdqa %ymm0, (%rdx) @@ -913,13 +913,13 @@ define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" ; CHECK-AVX512-NEXT: vmovdqa (%rsi), %ymm2 ; CHECK-AVX512-NEXT: vmovdqa 32(%rsi), %ymm3 ; CHECK-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; CHECK-AVX512-NEXT: vpand %ymm3, %ymm4, %ymm5 +; CHECK-AVX512-NEXT: vpand %ymm4, %ymm3, %ymm5 ; CHECK-AVX512-NEXT: vpmaddubsw %ymm5, %ymm1, %ymm5 ; CHECK-AVX512-NEXT: vpandn %ymm3, %ymm4, %ymm3 ; CHECK-AVX512-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1 ; CHECK-AVX512-NEXT: vpsllw $8, %ymm1, %ymm1 ; CHECK-AVX512-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm5 & ymm4) -; CHECK-AVX512-NEXT: vpand %ymm2, %ymm4, %ymm3 +; CHECK-AVX512-NEXT: vpand %ymm4, %ymm2, %ymm3 ; CHECK-AVX512-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 ; CHECK-AVX512-NEXT: vpandn %ymm2, %ymm4, %ymm2 ; CHECK-AVX512-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm0 @@ -939,13 +939,13 @@ define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" ; CHECK-VBMI-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; CHECK-VBMI-NEXT: vpandn %ymm3, %ymm4, %ymm5 ; CHECK-VBMI-NEXT: vpmaddubsw %ymm5, %ymm1, %ymm5 -; CHECK-VBMI-NEXT: vpand %ymm3, %ymm4, %ymm3 +; CHECK-VBMI-NEXT: vpand %ymm4, %ymm3, %ymm3 ; CHECK-VBMI-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1 ; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm3 = [0,32,2,34,4,36,6,38,8,40,10,42,12,44,14,46,16,48,18,50,20,52,22,54,24,56,26,58,28,60,30,62] ; CHECK-VBMI-NEXT: vpermt2b %ymm5, %ymm3, %ymm1 ; CHECK-VBMI-NEXT: vpandn %ymm2, %ymm4, %ymm5 ; CHECK-VBMI-NEXT: vpmaddubsw %ymm5, %ymm0, %ymm5 -; CHECK-VBMI-NEXT: vpand %ymm2, %ymm4, %ymm2 +; CHECK-VBMI-NEXT: vpand %ymm4, %ymm2, %ymm2 ; CHECK-VBMI-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm0 ; CHECK-VBMI-NEXT: vpermt2b %ymm5, %ymm3, %ymm0 ; CHECK-VBMI-NEXT: vmovdqa %ymm0, (%rdx) @@ -967,7 +967,7 @@ define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" ; CHECK-SKX-VBMI-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; CHECK-SKX-VBMI-NEXT: vpandnq %zmm1, %zmm2, %zmm3 ; CHECK-SKX-VBMI-NEXT: vpmaddubsw %zmm3, %zmm0, %zmm3 -; CHECK-SKX-VBMI-NEXT: vpandq %zmm1, %zmm2, %zmm1 +; CHECK-SKX-VBMI-NEXT: vpandq %zmm2, %zmm1, %zmm1 ; CHECK-SKX-VBMI-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 ; CHECK-SKX-VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,64,2,66,4,68,6,70,8,72,10,74,12,76,14,78,16,80,18,82,20,84,22,86,24,88,26,90,28,92,30,94,32,96,34,98,36,100,38,102,40,104,42,106,44,108,46,110,48,112,50,114,52,116,54,118,56,120,58,122,60,124,62,126] ; CHECK-SKX-VBMI-NEXT: vpermi2b %zmm3, %zmm0, %zmm1 @@ -980,7 +980,7 @@ define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" ; CHECK-AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 ; CHECK-AVX512-NEXT: vmovdqa64 (%rsi), %zmm1 ; CHECK-AVX512-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; CHECK-AVX512-NEXT: vpandq %zmm1, %zmm2, %zmm3 +; CHECK-AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm3 ; CHECK-AVX512-NEXT: vpmaddubsw %zmm3, %zmm0, %zmm3 ; CHECK-AVX512-NEXT: vpandnq %zmm1, %zmm2, %zmm1 ; CHECK-AVX512-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 @@ -997,7 +997,7 @@ define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"=" ; CHECK-VBMI-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; CHECK-VBMI-NEXT: vpandnq %zmm1, %zmm2, %zmm3 ; CHECK-VBMI-NEXT: vpmaddubsw %zmm3, %zmm0, %zmm3 -; CHECK-VBMI-NEXT: vpandq %zmm1, %zmm2, %zmm1 +; CHECK-VBMI-NEXT: vpandq %zmm2, %zmm1, %zmm1 ; CHECK-VBMI-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 ; CHECK-VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,64,2,66,4,68,6,70,8,72,10,74,12,76,14,78,16,80,18,82,20,84,22,86,24,88,26,90,28,92,30,94,32,96,34,98,36,100,38,102,40,104,42,106,44,108,46,110,48,112,50,114,52,116,54,118,56,120,58,122,60,124,62,126] ; CHECK-VBMI-NEXT: vpermi2b %zmm3, %zmm0, %zmm1 diff --git a/llvm/test/CodeGen/X86/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/X86/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..37756009fa7d86 --- /dev/null +++ b/llvm/test/CodeGen/X86/naked-fn-with-frame-pointer.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple i386 | FileCheck %s -check-prefixes=CHECK-32 +; RUN: llc < %s -mtriple x86_64 | FileCheck %s -check-prefixes=CHECK-64 + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-32-LABEL: naked: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: calll main +; +; CHECK-64-LABEL: naked: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: callq main + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-32-LABEL: normal: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: pushl %ebp +; CHECK-32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-32-NEXT: .cfi_offset %ebp, -8 +; CHECK-32-NEXT: movl %esp, %ebp +; CHECK-32-NEXT: .cfi_def_cfa_register %ebp +; CHECK-32-NEXT: calll main +; +; CHECK-64-LABEL: normal: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: pushq %rbp +; CHECK-64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-64-NEXT: .cfi_offset %rbp, -16 +; CHECK-64-NEXT: movq %rsp, %rbp +; CHECK-64-NEXT: .cfi_def_cfa_register %rbp +; CHECK-64-NEXT: callq main + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 6c3d04863118cf..fe8a4fa1631299 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -161,8 +161,8 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind { ; SSE41-LABEL: mul_v16i8: ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: pand %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm3 ; SSE41-NEXT: movdqa %xmm0, %xmm4 ; SSE41-NEXT: pmaddubsw %xmm3, %xmm4 ; SSE41-NEXT: pand %xmm2, %xmm4 @@ -586,17 +586,16 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind { ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pand %xmm2, %xmm5 +; SSE41-NEXT: pandn %xmm2, %xmm5 +; SSE41-NEXT: pand %xmm4, %xmm2 ; SSE41-NEXT: movdqa %xmm0, %xmm6 -; SSE41-NEXT: pmaddubsw %xmm5, %xmm6 +; SSE41-NEXT: pmaddubsw %xmm2, %xmm6 ; SSE41-NEXT: pand %xmm4, %xmm6 -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pandn %xmm2, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm5, %xmm0 ; SSE41-NEXT: psllw $8, %xmm0 ; SSE41-NEXT: por %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm2 -; SSE41-NEXT: pand %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm2, %xmm5 ; SSE41-NEXT: pand %xmm4, %xmm5 @@ -609,7 +608,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind { ; AVX2-LABEL: mul_v32i8: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm3 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3 ; AVX2-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 ; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm3 ; AVX2-NEXT: vpandn %ymm1, %ymm2, %ymm1 @@ -621,7 +620,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind { ; AVX512F-LABEL: mul_v32i8: ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm3 +; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 ; AVX512F-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm3 ; AVX512F-NEXT: vpandn %ymm1, %ymm2, %ymm1 @@ -902,37 +901,34 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: movdqa %xmm8, %xmm9 -; SSE41-NEXT: pand %xmm4, %xmm9 +; SSE41-NEXT: pandn %xmm4, %xmm9 +; SSE41-NEXT: pand %xmm8, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm10 -; SSE41-NEXT: pmaddubsw %xmm9, %xmm10 +; SSE41-NEXT: pmaddubsw %xmm4, %xmm10 ; SSE41-NEXT: pand %xmm8, %xmm10 -; SSE41-NEXT: movdqa %xmm8, %xmm9 -; SSE41-NEXT: pandn %xmm4, %xmm9 ; SSE41-NEXT: pmaddubsw %xmm9, %xmm0 ; SSE41-NEXT: psllw $8, %xmm0 ; SSE41-NEXT: por %xmm10, %xmm0 ; SSE41-NEXT: movdqa %xmm8, %xmm4 -; SSE41-NEXT: pand %xmm5, %xmm4 +; SSE41-NEXT: pandn %xmm5, %xmm4 +; SSE41-NEXT: pand %xmm8, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm9 -; SSE41-NEXT: pmaddubsw %xmm4, %xmm9 +; SSE41-NEXT: pmaddubsw %xmm5, %xmm9 ; SSE41-NEXT: pand %xmm8, %xmm9 -; SSE41-NEXT: movdqa %xmm8, %xmm4 -; SSE41-NEXT: pandn %xmm5, %xmm4 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm1 ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: por %xmm9, %xmm1 ; SSE41-NEXT: movdqa %xmm8, %xmm4 -; SSE41-NEXT: pand %xmm6, %xmm4 +; SSE41-NEXT: pandn %xmm6, %xmm4 +; SSE41-NEXT: pand %xmm8, %xmm6 ; SSE41-NEXT: movdqa %xmm2, %xmm5 -; SSE41-NEXT: pmaddubsw %xmm4, %xmm5 +; SSE41-NEXT: pmaddubsw %xmm6, %xmm5 ; SSE41-NEXT: pand %xmm8, %xmm5 -; SSE41-NEXT: movdqa %xmm8, %xmm4 -; SSE41-NEXT: pandn %xmm6, %xmm4 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm2 ; SSE41-NEXT: psllw $8, %xmm2 ; SSE41-NEXT: por %xmm5, %xmm2 -; SSE41-NEXT: movdqa %xmm8, %xmm4 -; SSE41-NEXT: pand %xmm7, %xmm4 +; SSE41-NEXT: movdqa %xmm7, %xmm4 +; SSE41-NEXT: pand %xmm8, %xmm4 ; SSE41-NEXT: movdqa %xmm3, %xmm5 ; SSE41-NEXT: pmaddubsw %xmm4, %xmm5 ; SSE41-NEXT: pand %xmm8, %xmm5 @@ -945,14 +941,14 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; AVX2-LABEL: mul_v64i8: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm5 +; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm5 ; AVX2-NEXT: vpmaddubsw %ymm5, %ymm0, %ymm5 ; AVX2-NEXT: vpand %ymm4, %ymm5, %ymm5 ; AVX2-NEXT: vpandn %ymm2, %ymm4, %ymm2 ; AVX2-NEXT: vpmaddubsw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm5, %ymm0 -; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm2 +; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm2 ; AVX2-NEXT: vpmaddubsw %ymm2, %ymm1, %ymm2 ; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2 ; AVX2-NEXT: vpandn %ymm3, %ymm4, %ymm3 @@ -963,28 +959,28 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; ; AVX512F-LABEL: mul_v64i8: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5 ; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm5, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm3, %ymm6 +; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm6 ; AVX512F-NEXT: vpmaddubsw %ymm6, %ymm0, %ymm6 ; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4 -; AVX512F-NEXT: vpandn %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpandn %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vpandn %ymm2, %ymm3, %ymm1 +; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm1 ; AVX512F-NEXT: vpmaddubsw %ymm1, %ymm5, %ymm1 ; AVX512F-NEXT: vpsllw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 | (zmm4 & zmm3) +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 | (zmm4 & zmm2) ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: mul_v64i8: ; AVX512BW: # %bb.0: # %entry ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpandq %zmm1, %zmm2, %zmm3 +; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 ; AVX512BW-NEXT: vpmaddubsw %zmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vpandnq %zmm1, %zmm2, %zmm1 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll index c9bb3de92dcda6..885b07585e68f4 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll @@ -59,7 +59,7 @@ define <32 x i8> @test_mul_32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX256BW-LABEL: test_mul_32i8: ; AVX256BW: # %bb.0: ; AVX256BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX256BW-NEXT: vpand %ymm1, %ymm2, %ymm3 +; AVX256BW-NEXT: vpand %ymm2, %ymm1, %ymm3 ; AVX256BW-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 ; AVX256BW-NEXT: vpandn %ymm1, %ymm2, %ymm1 ; AVX256BW-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index be8adf697d5c1b..9656822d144e45 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -1671,12 +1671,11 @@ define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind { ; SSE41: # %bb.0: # %vector.ph ; SSE41-NEXT: movdqa %xmm0, %xmm5 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm9, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm8 +; SSE41-NEXT: pxor %xmm9, %xmm8 ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002324991,9223372039002324991] -; SSE41-NEXT: movdqa %xmm7, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm8[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm8 ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183,2147549183,2147549183] ; SSE41-NEXT: movdqa %xmm6, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 @@ -1684,22 +1683,20 @@ define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind { ; SSE41-NEXT: movapd {{.*#+}} xmm8 = [65535,65535] ; SSE41-NEXT: movapd %xmm8, %xmm10 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm10 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm9, %xmm0 -; SSE41-NEXT: movdqa %xmm7, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: pxor %xmm9, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm4[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: movapd %xmm8, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 ; SSE41-NEXT: packusdw %xmm10, %xmm4 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm9, %xmm0 -; SSE41-NEXT: movdqa %xmm7, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: pxor %xmm9, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm3[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm3, %xmm0 @@ -2771,12 +2768,11 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm7 ; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm9 +; SSE41-NEXT: pxor %xmm10, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002259455,9223372039002259455] -; SSE41-NEXT: movdqa %xmm8, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483647,2147483647,2147483647,2147483647] ; SSE41-NEXT: movdqa %xmm6, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 @@ -2784,11 +2780,10 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) { ; SSE41-NEXT: movapd {{.*#+}} xmm9 = [4294967295,4294967295] ; SSE41-NEXT: movapd %xmm9, %xmm11 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm11 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm8, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: pxor %xmm10, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm3[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm12, %xmm0 ; SSE41-NEXT: pand %xmm3, %xmm0 @@ -2797,11 +2792,10 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) { ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm11[0,2] ; SSE41-NEXT: pmaxud %xmm3, %xmm7 ; SSE41-NEXT: psubd %xmm3, %xmm7 -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm8, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm5, %xmm2 +; SSE41-NEXT: pxor %xmm10, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm2 ; SSE41-NEXT: movdqa %xmm6, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0 ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -2997,12 +2991,11 @@ define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) { ; SSE41-NEXT: pand %xmm0, %xmm1 ; SSE41-NEXT: pand %xmm0, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm9 +; SSE41-NEXT: pxor %xmm10, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002259455,9223372039002259455] -; SSE41-NEXT: movdqa %xmm8, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [2147483647,2147483647,2147483647,2147483647] ; SSE41-NEXT: movdqa %xmm7, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 @@ -3010,11 +3003,10 @@ define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) { ; SSE41-NEXT: movapd {{.*#+}} xmm9 = [4294967295,4294967295] ; SSE41-NEXT: movapd %xmm9, %xmm11 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm11 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm8, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm2, %xmm3 +; SSE41-NEXT: pxor %xmm10, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm3[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm3 ; SSE41-NEXT: movdqa %xmm7, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm12, %xmm0 ; SSE41-NEXT: pand %xmm3, %xmm0 @@ -3023,11 +3015,10 @@ define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) { ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm11[0,2] ; SSE41-NEXT: pmaxud %xmm3, %xmm6 ; SSE41-NEXT: psubd %xmm3, %xmm6 -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm8, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm5, %xmm2 +; SSE41-NEXT: pxor %xmm10, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm8, %xmm2 ; SSE41-NEXT: movdqa %xmm7, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm0 ; SSE41-NEXT: pand %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll index 949902a5ebc47c..b12be7cb129d34 100644 --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -631,8 +631,8 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) { ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456] ; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372034707292117,9223372034707292117] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm4 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll index efe34c52b3710d..d3e4906450e431 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll @@ -84,8 +84,8 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) { ; SSE-NEXT: pshufb %xmm8, %xmm1 ; SSE-NEXT: por %xmm4, %xmm1 ; SSE-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE-NEXT: movdqa %xmm2, %xmm3 -; SSE-NEXT: pand %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm1, %xmm3 +; SSE-NEXT: pand %xmm2, %xmm3 ; SSE-NEXT: movdqa %xmm0, %xmm4 ; SSE-NEXT: pmaddubsw %xmm3, %xmm4 ; SSE-NEXT: pand %xmm2, %xmm4 @@ -120,7 +120,7 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) { ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm2 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX1-NEXT: vpmaddubsw %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vpandn %xmm1, %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index 5568604ac29a37..0af5e9aeccd922 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -57,8 +57,8 @@ define <2 x i32> @trunc_packus_v2i64_v2i32(<2 x i64> %a0) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovsxdq {{.*#+}} xmm4 = [2147483647,2147483647] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -175,8 +175,8 @@ define void @trunc_packus_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovsxdq {{.*#+}} xmm4 = [2147483647,2147483647] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -317,12 +317,12 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [4294967295,4294967295] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovsxdq {{.*#+}} xmm6 = [2147483647,2147483647] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -330,8 +330,8 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -584,35 +584,32 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="25 ; SSE41-NEXT: movdqa 48(%rdi), %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [4294967295,4294967295] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm5, %xmm4 +; SSE41-NEXT: pxor %xmm3, %xmm4 ; SSE41-NEXT: pmovsxdq {{.*#+}} xmm6 = [2147483647,2147483647] -; SSE41-NEXT: movdqa %xmm6, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm8, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm5 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm3, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -620,8 +617,8 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="25 ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -828,8 +825,8 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147549183,2147549183] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -971,8 +968,8 @@ define void @trunc_packus_v2i64_v2i16_store(<2 x i64> %a0, ptr%p1) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147549183,2147549183] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -1143,12 +1140,12 @@ define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [65535,65535] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147549183,2147549183] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -1156,8 +1153,8 @@ define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -1333,12 +1330,12 @@ define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [65535,65535] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147549183,2147549183] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -1346,8 +1343,8 @@ define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -1583,35 +1580,32 @@ define <8 x i16> @trunc_packus_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="25 ; SSE41-NEXT: movdqa 48(%rdi), %xmm8 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [65535,65535] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm2, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147549183,2147549183] -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm8, %xmm4 +; SSE41-NEXT: pxor %xmm2, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm2, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -1619,8 +1613,8 @@ define <8 x i16> @trunc_packus_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="25 ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -2239,8 +2233,8 @@ define <2 x i8> @trunc_packus_v2i64_v2i8(<2 x i64> %a0) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -2393,8 +2387,8 @@ define void @trunc_packus_v2i64_v2i8_store(<2 x i64> %a0, ptr%p1) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -2539,12 +2533,12 @@ define <4 x i8> @trunc_packus_v4i64_v4i8(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -2552,8 +2546,8 @@ define <4 x i8> @trunc_packus_v4i64_v4i8(<4 x i64> %a0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -2733,12 +2727,12 @@ define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -2746,8 +2740,8 @@ define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -2987,35 +2981,32 @@ define <8 x i8> @trunc_packus_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" ; SSE41-NEXT: movdqa 48(%rdi), %xmm8 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [255,255] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm2, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm8, %xmm4 +; SSE41-NEXT: pxor %xmm2, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm2, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -3023,8 +3014,8 @@ define <8 x i8> @trunc_packus_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -3277,35 +3268,32 @@ define void @trunc_packus_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-wi ; SSE41-NEXT: movdqa 48(%rdi), %xmm8 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm2, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm2 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm8, %xmm3 +; SSE41-NEXT: pxor %xmm1, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm1, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -3313,8 +3301,8 @@ define void @trunc_packus_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-wi ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -3677,79 +3665,72 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="2 ; SSE41-NEXT: movdqa 96(%rdi), %xmm4 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [255,255] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm2, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm9 = [2147483903,2147483903] -; SSE41-NEXT: movdqa %xmm9, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm5, %xmm4 +; SSE41-NEXT: pxor %xmm2, %xmm4 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 -; SSE41-NEXT: movdqa %xmm6, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pxor %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm5 ; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm5 -; SSE41-NEXT: movdqa %xmm10, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 +; SSE41-NEXT: movdqa %xmm10, %xmm6 +; SSE41-NEXT: pxor %xmm2, %xmm6 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm6, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm6 ; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm6 -; SSE41-NEXT: movdqa %xmm12, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm10 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm10 +; SSE41-NEXT: movdqa %xmm12, %xmm10 +; SSE41-NEXT: pxor %xmm2, %xmm10 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm10, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm10 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm10, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm10 ; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm10 -; SSE41-NEXT: movdqa %xmm11, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm12 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm12 +; SSE41-NEXT: movdqa %xmm11, %xmm12 +; SSE41-NEXT: pxor %xmm2, %xmm12 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm12, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm12 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm12, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm12 ; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm12 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm11 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm11 +; SSE41-NEXT: movdqa %xmm8, %xmm11 +; SSE41-NEXT: pxor %xmm2, %xmm11 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm11, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm11 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm11, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 @@ -3757,8 +3738,8 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="2 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm11 ; SSE41-NEXT: movdqa %xmm7, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm0, %xmm8 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm8 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index d276a6873012a9..3c03c521c27229 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -59,8 +59,8 @@ define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -182,8 +182,8 @@ define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -334,12 +334,12 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [2147483647,2147483647] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -347,8 +347,8 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -604,35 +604,32 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="256" ; SSE41-NEXT: movdqa 48(%rdi), %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [2147483647,2147483647] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm5, %xmm4 +; SSE41-NEXT: pxor %xmm3, %xmm4 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0] -; SSE41-NEXT: movdqa %xmm6, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm8, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm5 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm3, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -640,8 +637,8 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="256" ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -849,8 +846,8 @@ define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -983,8 +980,8 @@ define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, ptr%p1) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -1149,12 +1146,12 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -1162,8 +1159,8 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -1333,12 +1330,12 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -1346,8 +1343,8 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -1579,35 +1576,32 @@ define <8 x i16> @trunc_ssat_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="256" ; SSE41-NEXT: movdqa 48(%rdi), %xmm8 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [32767,32767] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm2, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415] -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm8, %xmm4 +; SSE41-NEXT: pxor %xmm2, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm2, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -1615,8 +1609,8 @@ define <8 x i16> @trunc_ssat_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="256" ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -2002,8 +1996,8 @@ define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -2148,8 +2142,8 @@ define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, ptr%p1) { ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] ; SSE41-NEXT: pxor %xmm3, %xmm0 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 @@ -2288,12 +2282,12 @@ define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -2301,8 +2295,8 @@ define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -2476,12 +2470,12 @@ define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] -; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm3, %xmm5 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm6, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 ; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm7, %xmm0 @@ -2489,8 +2483,8 @@ define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 @@ -2726,35 +2720,32 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" { ; SSE41-NEXT: movdqa 48(%rdi), %xmm8 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm2, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm8, %xmm4 +; SSE41-NEXT: pxor %xmm2, %xmm4 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm2, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -2762,8 +2753,8 @@ define <8 x i8> @trunc_ssat_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" { ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -3022,35 +3013,32 @@ define void @trunc_ssat_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-widt ; SSE41-NEXT: movdqa 48(%rdi), %xmm8 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm2, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm2, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm2 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm8, %xmm3 +; SSE41-NEXT: pxor %xmm1, %xmm3 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm1, %xmm8 ; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 +; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 ; SSE41-NEXT: por %xmm9, %xmm0 @@ -3058,8 +3046,8 @@ define void @trunc_ssat_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-widt ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 ; SSE41-NEXT: movdqa %xmm5, %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm0, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] ; SSE41-NEXT: pand %xmm7, %xmm0 @@ -3430,79 +3418,72 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="256 ; SSE41-NEXT: movdqa 96(%rdi), %xmm4 ; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm2, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm9 = [2147483775,2147483775] -; SSE41-NEXT: movdqa %xmm9, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm3 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm3 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm5, %xmm4 +; SSE41-NEXT: pxor %xmm2, %xmm4 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm4 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm4, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm4 ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 -; SSE41-NEXT: movdqa %xmm6, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pxor %xmm2, %xmm5 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm5, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm5 ; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm5 -; SSE41-NEXT: movdqa %xmm10, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 +; SSE41-NEXT: movdqa %xmm10, %xmm6 +; SSE41-NEXT: pxor %xmm2, %xmm6 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm6, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm6 ; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm6 -; SSE41-NEXT: movdqa %xmm12, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm10 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm10 +; SSE41-NEXT: movdqa %xmm12, %xmm10 +; SSE41-NEXT: pxor %xmm2, %xmm10 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm10, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm10 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm10, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm10 ; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm10 -; SSE41-NEXT: movdqa %xmm11, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm12 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm12 +; SSE41-NEXT: movdqa %xmm11, %xmm12 +; SSE41-NEXT: pxor %xmm2, %xmm12 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm12, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm12 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm12, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm1, %xmm12 ; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm12 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm11 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm11 +; SSE41-NEXT: movdqa %xmm8, %xmm11 +; SSE41-NEXT: pxor %xmm2, %xmm11 ; SSE41-NEXT: movdqa %xmm9, %xmm13 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm13 +; SSE41-NEXT: pcmpgtd %xmm11, %xmm13 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm11 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] ; SSE41-NEXT: pand %xmm11, %xmm0 ; SSE41-NEXT: por %xmm13, %xmm0 @@ -3510,8 +3491,8 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="256 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm11 ; SSE41-NEXT: movdqa %xmm7, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: movdqa %xmm9, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE41-NEXT: movdqa %xmm0, %xmm8 +; SSE41-NEXT: pcmpeqd %xmm9, %xmm8 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] ; SSE41-NEXT: pand %xmm8, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index 4126616937473e..c1d22dc7daf216 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -207,20 +207,20 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: pxor %xmm4, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259455,9223372039002259455] -; SSE41-NEXT: movdqa %xmm5, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 -; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: pxor %xmm4, %xmm5 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259455,9223372039002259455] +; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483647,2147483647,2147483647,2147483647] ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: pcmpgtd %xmm7, %xmm3 -; SSE41-NEXT: pand %xmm6, %xmm3 +; SSE41-NEXT: pand %xmm5, %xmm3 ; SSE41-NEXT: pxor %xmm1, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,2,2] ; SSE41-NEXT: pcmpgtd %xmm4, %xmm0 -; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: pand %xmm6, %xmm0 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [4294967295,4294967295] ; SSE41-NEXT: movapd {{.*#+}} xmm5 = [4294967295,429496729] ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 @@ -407,34 +407,31 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) { ; SSE41-NEXT: movdqa 48(%rdi), %xmm1 ; SSE41-NEXT: movapd {{.*#+}} xmm3 = [4294967295,4294967295] ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 +; SSE41-NEXT: movdqa %xmm1, %xmm9 +; SSE41-NEXT: pxor %xmm6, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259455,9223372039002259455] -; SSE41-NEXT: movdqa %xmm5, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm3, %xmm9 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm9 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm5, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm8, %xmm1 +; SSE41-NEXT: pxor %xmm6, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm1 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: movapd %xmm3, %xmm1 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm9[0,2] -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm5, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 -; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm6, %xmm8 +; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm8 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 ; SSE41-NEXT: pand %xmm8, %xmm0 @@ -790,26 +787,25 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535] ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002324991,9223372039002324991] -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm1, %xmm6 +; SSE41-NEXT: pxor %xmm5, %xmm6 +; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002324991,9223372039002324991] +; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183] ; SSE41-NEXT: movdqa %xmm4, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 -; SSE41-NEXT: pand %xmm7, %xmm0 -; SSE41-NEXT: movapd %xmm2, %xmm7 -; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm2, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 ; SSE41-NEXT: pxor %xmm3, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 -; SSE41-NEXT: pand %xmm6, %xmm4 +; SSE41-NEXT: pand %xmm7, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: packusdw %xmm7, %xmm2 +; SSE41-NEXT: packusdw %xmm6, %xmm2 ; SSE41-NEXT: packusdw %xmm2, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq @@ -924,26 +920,25 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [65535,65535] ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002324991,9223372039002324991] -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm1, %xmm6 +; SSE41-NEXT: pxor %xmm5, %xmm6 +; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002324991,9223372039002324991] +; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147549183,2147549183,2147549183,2147549183] ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 -; SSE41-NEXT: pand %xmm7, %xmm0 -; SSE41-NEXT: movapd %xmm4, %xmm7 -; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 ; SSE41-NEXT: pxor %xmm2, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE41-NEXT: pand %xmm6, %xmm3 +; SSE41-NEXT: pand %xmm7, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 -; SSE41-NEXT: packusdw %xmm7, %xmm4 +; SSE41-NEXT: packusdw %xmm6, %xmm4 ; SSE41-NEXT: packusdw %xmm4, %xmm4 ; SSE41-NEXT: movq %xmm4, (%rdi) ; SSE41-NEXT: retq @@ -1094,34 +1089,31 @@ define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) { ; SSE41-NEXT: movdqa 48(%rdi), %xmm7 ; SSE41-NEXT: movapd {{.*#+}} xmm3 = [65535,65535] ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 +; SSE41-NEXT: movdqa %xmm2, %xmm9 +; SSE41-NEXT: pxor %xmm6, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] -; SSE41-NEXT: movdqa %xmm5, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147549183,2147549183,2147549183,2147549183] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm3, %xmm9 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm5, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm8, %xmm2 +; SSE41-NEXT: pxor %xmm6, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm2, %xmm0 ; SSE41-NEXT: movapd %xmm3, %xmm2 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2 ; SSE41-NEXT: packusdw %xmm9, %xmm2 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm5, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 -; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm6, %xmm8 +; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm8 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 ; SSE41-NEXT: pand %xmm8, %xmm0 @@ -1869,26 +1861,25 @@ define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) { ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255] ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259711,9223372039002259711] -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm1, %xmm6 +; SSE41-NEXT: pxor %xmm5, %xmm6 +; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259711,9223372039002259711] +; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483903,2147483903,2147483903,2147483903] ; SSE41-NEXT: movdqa %xmm4, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 -; SSE41-NEXT: pand %xmm7, %xmm0 -; SSE41-NEXT: movapd %xmm2, %xmm7 -; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm2, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 ; SSE41-NEXT: pxor %xmm3, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] ; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 -; SSE41-NEXT: pand %xmm6, %xmm4 +; SSE41-NEXT: pand %xmm7, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: packusdw %xmm7, %xmm2 +; SSE41-NEXT: packusdw %xmm6, %xmm2 ; SSE41-NEXT: packusdw %xmm2, %xmm2 ; SSE41-NEXT: packuswb %xmm2, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 @@ -2005,26 +1996,25 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259711,9223372039002259711] -; SSE41-NEXT: movdqa %xmm6, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm1, %xmm6 +; SSE41-NEXT: pxor %xmm5, %xmm6 +; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259711,9223372039002259711] +; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483903,2147483903,2147483903,2147483903] ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 -; SSE41-NEXT: pand %xmm7, %xmm0 -; SSE41-NEXT: movapd %xmm4, %xmm7 -; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 ; SSE41-NEXT: pxor %xmm2, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE41-NEXT: pand %xmm6, %xmm3 +; SSE41-NEXT: pand %xmm7, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 -; SSE41-NEXT: packusdw %xmm7, %xmm4 +; SSE41-NEXT: packusdw %xmm6, %xmm4 ; SSE41-NEXT: packusdw %xmm4, %xmm4 ; SSE41-NEXT: packuswb %xmm4, %xmm4 ; SSE41-NEXT: movd %xmm4, (%rdi) @@ -2175,34 +2165,31 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) { ; SSE41-NEXT: movdqa 48(%rdi), %xmm7 ; SSE41-NEXT: movapd {{.*#+}} xmm3 = [255,255] ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 +; SSE41-NEXT: movdqa %xmm2, %xmm9 +; SSE41-NEXT: pxor %xmm6, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] -; SSE41-NEXT: movdqa %xmm5, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm3, %xmm9 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm5, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm8, %xmm2 +; SSE41-NEXT: pxor %xmm6, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm2, %xmm0 ; SSE41-NEXT: movapd %xmm3, %xmm2 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2 ; SSE41-NEXT: packusdw %xmm9, %xmm2 -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm6, %xmm0 -; SSE41-NEXT: movdqa %xmm5, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 -; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm7, %xmm8 +; SSE41-NEXT: pxor %xmm6, %xmm8 +; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm5, %xmm8 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 ; SSE41-NEXT: pand %xmm8, %xmm0 @@ -2360,34 +2347,31 @@ define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) { ; SSE41-NEXT: movdqa 48(%rdi), %xmm6 ; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255] ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm7, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: movdqa %xmm7, %xmm9 +; SSE41-NEXT: pxor %xmm5, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259711,9223372039002259711] -; SSE41-NEXT: movdqa %xmm4, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm9 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm2, %xmm9 ; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm9 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm8, %xmm7 +; SSE41-NEXT: pxor %xmm5, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm7 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm7, %xmm0 ; SSE41-NEXT: movapd %xmm2, %xmm7 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7 ; SSE41-NEXT: packusdw %xmm9, %xmm7 -; SSE41-NEXT: movdqa %xmm6, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 -; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm6, %xmm8 +; SSE41-NEXT: pxor %xmm5, %xmm8 +; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm4, %xmm8 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 ; SSE41-NEXT: pand %xmm8, %xmm0 @@ -2602,44 +2586,40 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) { ; SSE41-NEXT: movdqa 48(%rdi), %xmm11 ; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259456,9223372039002259456] -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 +; SSE41-NEXT: movdqa %xmm2, %xmm13 +; SSE41-NEXT: pxor %xmm7, %xmm13 ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259711,9223372039002259711] -; SSE41-NEXT: movdqa %xmm6, %xmm13 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm13 -; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[0,0,2,2] +; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm13 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm14, %xmm0 ; SSE41-NEXT: pand %xmm13, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm13 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm13 -; SSE41-NEXT: movdqa %xmm12, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm12, %xmm2 +; SSE41-NEXT: pxor %xmm7, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm2[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm14, %xmm0 ; SSE41-NEXT: pand %xmm2, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm2 ; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm2 ; SSE41-NEXT: packusdw %xmm13, %xmm2 -; SSE41-NEXT: movdqa %xmm11, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm12 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm12 -; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm11, %xmm12 +; SSE41-NEXT: pxor %xmm7, %xmm12 +; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm12 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm13, %xmm0 ; SSE41-NEXT: pand %xmm12, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm12 ; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm12 -; SSE41-NEXT: movdqa %xmm10, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm11 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm11 -; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm10, %xmm11 +; SSE41-NEXT: pxor %xmm7, %xmm11 +; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm11 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm13, %xmm0 ; SSE41-NEXT: pand %xmm11, %xmm0 @@ -2647,32 +2627,29 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) { ; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm11 ; SSE41-NEXT: packusdw %xmm12, %xmm11 ; SSE41-NEXT: packusdw %xmm11, %xmm2 -; SSE41-NEXT: movdqa %xmm9, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm10 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm10 -; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm9, %xmm10 +; SSE41-NEXT: pxor %xmm7, %xmm10 +; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm10 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 ; SSE41-NEXT: pand %xmm10, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm10 ; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm10 -; SSE41-NEXT: movdqa %xmm8, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm9 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 -; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm8, %xmm9 +; SSE41-NEXT: pxor %xmm7, %xmm9 +; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm9[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 ; SSE41-NEXT: pand %xmm9, %xmm0 ; SSE41-NEXT: movapd %xmm4, %xmm9 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm9 ; SSE41-NEXT: packusdw %xmm10, %xmm9 -; SSE41-NEXT: movdqa %xmm5, %xmm0 -; SSE41-NEXT: pxor %xmm7, %xmm0 -; SSE41-NEXT: movdqa %xmm6, %xmm8 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm8 -; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] +; SSE41-NEXT: movdqa %xmm5, %xmm8 +; SSE41-NEXT: pxor %xmm7, %xmm8 +; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm8[0,0,2,2] +; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 ; SSE41-NEXT: pand %xmm8, %xmm0 diff --git a/llvm/test/CodeGen/XCore/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/XCore/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..429a78108a7bac --- /dev/null +++ b/llvm/test/CodeGen/XCore/naked-fn-with-frame-pointer.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march xcore | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: # %bb.0: +; CHECK-NEXT: bl main +; CHECK-NEXT: .cc_bottom naked.function + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: # %bb.0: +; CHECK-NEXT: entsp 2 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset 15, 0 +; CHECK-NEXT: stw r10, sp[1] # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 10, -4 +; CHECK-NEXT: ldaw r10, sp[0] +; CHECK-NEXT: .cfi_def_cfa_register 10 +; CHECK-NEXT: extsp 1 +; CHECK-NEXT: bl main +; CHECK-NEXT: ldaw sp, sp[1] +; CHECK-NEXT: .cc_bottom normal.function + call void @main() + unreachable +} diff --git a/llvm/test/CodeGen/Xtensa/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/Xtensa/naked-fn-with-frame-pointer.ll new file mode 100644 index 00000000000000..020fcc4f6dae6d --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/naked-fn-with-frame-pointer.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march xtensa | FileCheck %s -check-prefixes=CHECK + +declare dso_local void @main() + +define dso_local void @naked() naked "frame-pointer"="all" { +; CHECK-LABEL: naked: +; CHECK: # %bb.0: +; CHECK-NEXT: l32r a8, {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: callx0 a8 + call void @main() + unreachable +} + +define dso_local void @normal() "frame-pointer"="all" { +; CHECK-LABEL: normal: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: s32i a0, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a15, a1, 0 # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset a0, -4 +; CHECK-NEXT: .cfi_offset a15, -8 +; CHECK-NEXT: or a15, a1, a1 +; CHECK-NEXT: .cfi_def_cfa_register a15 +; CHECK-NEXT: l32r a8, {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: callx0 a8 + call void @main() + unreachable +} diff --git a/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td b/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td new file mode 100644 index 00000000000000..79af1a336f2890 --- /dev/null +++ b/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td @@ -0,0 +1,12 @@ +// RUN: llvm-tblgen -gen-global-isel -warn-on-skipped-patterns -I %p/../../include -I %p/Common %s -o /dev/null 2>&1 < %s | FileCheck %s --implicit-check-not="Skipped pattern" + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +// CHECK: Skipped pattern: Pattern defines a physical register +let Uses = [B0], Defs = [B0] in +def tst1 : I<(outs), (ins), [(set B0, (add B0, 1))]>; + +// CHECK: Skipped pattern: Src pattern result has 1 def(s) without the HasNoUse predicate set to true but Dst MI has no def +let Uses = [B0] in +def tst2 : I<(outs), (ins), [(set B0, (add B0, 1))]>; diff --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll index 73576402b00296..34a8ecbcf7c074 100644 --- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-for-literal-constant=false < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/GlobalOpt/ctor-list-preserve-addrspace.ll b/llvm/test/Transforms/GlobalOpt/ctor-list-preserve-addrspace.ll new file mode 100644 index 00000000000000..3f2f041b90e74d --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ctor-list-preserve-addrspace.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt -S -passes=globalopt < %s | FileCheck %s + +; Make sure the address space of global_ctors is preserved + +%ini = type { i32, ptr, ptr } + +@llvm.global_ctors = appending addrspace(1) global [1 x %ini] [%ini { i32 65534, ptr @ctor1, ptr null }] + +;. +; CHECK: @llvm.global_ctors = appending addrspace(1) global [0 x %ini] zeroinitializer +;. +define void @ctor1() { +; CHECK-LABEL: define void @ctor1() local_unnamed_addr { +; CHECK-NEXT: ret void +; + ret void +} + diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 7c33d4765f6d04..8567cc00ed00e3 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -643,6 +643,15 @@ declare float @log2f(float) ; CHECK: declare x86_fp80 @log2l(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @log2l(x86_fp80) +; CHECK: declare i32 @ilogb(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare i32 @ilogb(double) + +; CHECK: declare i32 @ilogbf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare i32 @ilogbf(float) + +; CHECK: declare i32 @ilogbl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare i32 @ilogbl(x86_fp80) + ; CHECK: declare double @logb(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare double @logb(double) diff --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll index c938002788bc28..dac40852c4bdcb 100644 --- a/llvm/test/Transforms/InstCombine/sink_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll @@ -86,8 +86,8 @@ define i32 @test3(ptr nocapture readonly %P, i32 %i) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: switch i32 [[I:%.*]], label [[SW_EPILOG:%.*]] [ -; CHECK-NEXT: i32 5, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 2, label [[SW_BB]] +; CHECK-NEXT: i32 5, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 2, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 @@ -190,8 +190,8 @@ define i32 @test6(ptr nocapture readonly %P, i32 %i, i1 %cond) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: switch i32 [[I]], label [[SW_BB:%.*]] [ -; CHECK-NEXT: i32 5, label [[SW_EPILOG:%.*]] -; CHECK-NEXT: i32 2, label [[SW_EPILOG]] +; CHECK-NEXT: i32 5, label [[SW_EPILOG:%.*]] +; CHECK-NEXT: i32 2, label [[SW_EPILOG]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: br label [[SW_EPILOG]] @@ -272,3 +272,114 @@ abort: call void @abort() unreachable } + +; Loads marked invariant can be sunk past potential memory writes. + +define i32 @invariant_load_metadata(ptr %p, i1 %cond) { +; CHECK-LABEL: @invariant_load_metadata( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BLOCK:%.*]], label [[END:%.*]] +; CHECK: block: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4, !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = load i32, ptr %p, !invariant.load !0 + br i1 %cond, label %block, label %end +block: + call void @fn() + br label %end +end: + ret i32 %v +} + +; Loads not marked invariant cannot be sunk past potential memory writes. + +define i32 @invariant_load_neg(ptr %p, i1 %cond) { +; CHECK-LABEL: @invariant_load_neg( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BLOCK:%.*]], label [[END:%.*]] +; CHECK: block: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = load i32, ptr %p + br i1 %cond, label %block, label %end +block: + call void @fn() + br label %end +end: + ret i32 %v +} + +; Loads that aren't marked invariant but used in one branch +; can be sunk to that branch. + +define void @invariant_load_use_in_br(ptr %p, i1 %cond) { +; CHECK-LABEL: @invariant_load_use_in_br( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[TRUE_BR:%.*]], label [[FALSE_BR:%.*]] +; CHECK: true.br: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: false.br: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @fn(i32 [[VAL]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %p + br i1 %cond, label %true.br, label %false.br +true.br: + call void @fn() + br label %exit +false.br: + call void @fn(i32 %val) + br label %exit +exit: + ret void +} + +; Invariant loads marked with metadata can be sunk past calls. + +define void @invariant_load_metadata_call(ptr %p, i1 %cond) { +; CHECK-LABEL: @invariant_load_metadata_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br i1 [[COND:%.*]], label [[TRUE_BR:%.*]], label [[FALSE_BR:%.*]] +; CHECK: true.br: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: false.br: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: call void @fn(i32 [[VAL]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %p, !invariant.load !0 + call void @fn() + br i1 %cond, label %true.br, label %false.br +true.br: + call void @fn() + br label %exit +false.br: + call void @fn(i32 %val) + br label %exit +exit: + ret void +} + +declare void @fn() + +!0 = !{} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 01fca39296da09..7f325ce1a1f04b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -732,20 +732,9 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-LABEL: define void @multiple_exit_conditions( ; DEFAULT-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2:[0-9]+]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 32 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 257, [[TMP8]] -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; DEFAULT-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP3]] -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP17:%.*]] = mul i64 [[N_VEC]], 8 -; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]] -; DEFAULT-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2 -; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 +; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 2048 ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -753,39 +742,20 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] ; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2 -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[TMP1]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP9:%.*]] = or [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP10:%.*]] = or [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP11:%.*]] = or [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP12:%.*]] = or [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) -; DEFAULT-NEXT: [[TMP13:%.*]] = uitofp [[TMP9]] to -; DEFAULT-NEXT: [[TMP14:%.*]] = uitofp [[TMP10]] to -; DEFAULT-NEXT: [[TMP15:%.*]] = uitofp [[TMP11]] to -; DEFAULT-NEXT: [[TMP16:%.*]] = uitofp [[TMP12]] to +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], +; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double> ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0 -; DEFAULT-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP19]] -; DEFAULT-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP22]] -; DEFAULT-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 24 -; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP25]] -; DEFAULT-NEXT: store [[TMP13]], ptr [[TMP4]], align 8 -; DEFAULT-NEXT: store [[TMP14]], ptr [[TMP20]], align 8 -; DEFAULT-NEXT: store [[TMP15]], ptr [[TMP23]], align 8 -; DEFAULT-NEXT: store [[TMP16]], ptr [[TMP26]], align 8 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; DEFAULT-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[TMP4]], align 8 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; DEFAULT-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 257, [[N_VEC]] -; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: ; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 512, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: vector.scevcheck: ; DEFAULT-NEXT: unreachable @@ -810,7 +780,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PRED: vector.ph: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; PRED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]] ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] @@ -819,31 +789,31 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] ; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2 ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 +; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 +; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] ; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 257, [[TMP7]] ; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 257) +; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 257) ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; PRED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0 ; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] ; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2 -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[TMP12]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PRED-NEXT: [[TMP13:%.*]] = or [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) -; PRED-NEXT: [[TMP14:%.*]] = uitofp [[TMP13]] to +; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[TMP12]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; PRED-NEXT: [[TMP13:%.*]] = or [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) +; PRED-NEXT: [[TMP14:%.*]] = uitofp [[TMP13]] to ; PRED-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0 -; PRED-NEXT: call void @llvm.masked.store.nxv8f64.p0( [[TMP14]], ptr [[TMP15]], i32 8, [[ACTIVE_LANE_MASK]]) +; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP14]], ptr [[TMP15]], i32 8, [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP10]]) -; PRED-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; PRED-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 +; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) +; PRED-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; PRED-NEXT: [[TMP17:%.*]] = extractelement [[TMP16]], i32 0 ; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll index 59da1e10fd2a07..f28f77bf1b1558 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll @@ -1,23 +1,23 @@ ; REQUIRES: asserts ; RUN: opt -mtriple=aarch64 -mattr=+sve \ ; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE16 +; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4 ; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic \ ; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE16 +; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 \ ; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=NEOVERSE-V1,VF-VSCALE16 +; RUN: | FileCheck %s --check-prefixes=NEOVERSE-V1,VF-VSCALE4 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \ ; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE16 +; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4 ; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 \ ; RUN: -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE16 +; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4 ; GENERIC: LV: Vector loop of width vscale x 2 costs: 3 (assuming a minimum vscale of 2). ; GENERIC: LV: Vector loop of width vscale x 4 costs: 1 (assuming a minimum vscale of 2). @@ -29,7 +29,7 @@ ; NEOVERSE-N2: LV: Vector loop of width vscale x 4 costs: 3 (assuming a minimum vscale of 1). ; VF-4: <4 x i32> -; VF-VSCALE16: +; VF-VSCALE4: <16 x i32> define void @test0(ptr %a, ptr %b, ptr %c) #0 { entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll index a84932a2290d67..e83eb729b521c2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -8,8 +8,8 @@ ; (maximized bandwidth for i8 in the loop). define void @test0(ptr %a, ptr %b, ptr %c) #0 { ; CHECK: LV: Checking a loop in 'test0' -; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 16 -; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 16 +; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 +; CHECK_SCALABLE_ON: LV: Selecting VF: 16 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index a4861ad0b26196..7d2fc348480a09 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -145,7 +145,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i16 [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; DEFAULT-NEXT: iter.check: ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; DEFAULT-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP1]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; DEFAULT: vector.memcheck: @@ -155,72 +155,59 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; DEFAULT-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; DEFAULT: vector.main.loop.iter.check: -; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[TMP9]], 32 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 0, [[TMP3]] -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; DEFAULT-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 -; DEFAULT-NEXT: [[N_MOD_VF1:%.*]] = urem i64 0, [[TMP5]] -; DEFAULT-NEXT: [[N_VEC1:%.*]] = sub i64 0, [[N_MOD_VF1]] -; DEFAULT-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP8:%.*]] = trunc [[BROADCAST_SPLAT]] to +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT3]], <16 x i16> poison, <16 x i32> zeroinitializer +; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT4]] to <16 x i8> ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META5:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP11:%.*]] = trunc [[BROADCAST_SPLAT3]] to -; DEFAULT-NEXT: [[TMP22:%.*]] = and [[TMP11]], [[TMP8]] -; DEFAULT-NEXT: [[TMP13:%.*]] = and [[TMP11]], [[TMP8]] +; DEFAULT-NEXT: [[TMP4:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META5:![0-9]+]] +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TMP4]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer +; DEFAULT-NEXT: [[TMP5:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2]] to <16 x i8> +; DEFAULT-NEXT: [[TMP8:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]] +; DEFAULT-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]] ; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]] ; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 -; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP23:%.*]] = mul i64 [[TMP16]], 16 -; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP23]] -; DEFAULT-NEXT: store [[TMP22]], ptr [[TMP12]], align 1, !alias.scope [[META8:![0-9]+]], !noalias [[META5]] -; DEFAULT-NEXT: store [[TMP13]], ptr [[TMP24]], align 1, !alias.scope [[META8]], !noalias [[META5]] -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; DEFAULT-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC1]] -; DEFAULT-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16 +; DEFAULT-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP12]], align 1, !alias.scope [[META8:![0-9]+]], !noalias [[META5]] +; DEFAULT-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1, !alias.scope [[META8]], !noalias [[META5]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; DEFAULT-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; DEFAULT-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N1:%.*]] = icmp eq i64 0, [[N_VEC1]] -; DEFAULT-NEXT: br i1 [[CMP_N1]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; DEFAULT: vec.epilog.iter.check: -; DEFAULT-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 0, [[N_VEC1]] ; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP31:%.*]] = mul i64 [[TMP15]], 8 -; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP31]] +; DEFAULT-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP16]] ; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; DEFAULT: vec.epilog.ph: -; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 8 +; DEFAULT-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 0, [[TMP18]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 0, [[N_MOD_VF]] ; DEFAULT-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 8 -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i16 [[X]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP32:%.*]] = trunc [[BROADCAST_SPLAT7]] to +; DEFAULT-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, i16 [[X]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP24:%.*]] = trunc [[BROADCAST_SPLAT7]] to ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; DEFAULT: vec.epilog.vector.body: ; DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX5]], 0 -; DEFAULT-NEXT: [[TMP33:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META11:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement poison, i64 [[TMP33]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector [[BROADCAST_SPLATINSERT9]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP29:%.*]] = trunc [[BROADCAST_SPLAT10]] to -; DEFAULT-NEXT: [[TMP30:%.*]] = and [[TMP29]], [[TMP32]] +; DEFAULT-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META11:![0-9]+]] +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP22]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP23:%.*]] = trunc [[BROADCAST_SPLAT]] to +; DEFAULT-NEXT: [[TMP25:%.*]] = and [[TMP23]], [[TMP24]] ; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP21]] ; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 -; DEFAULT-NEXT: store [[TMP30]], ptr [[TMP27]], align 1, !alias.scope [[META14:![0-9]+]], !noalias [[META11]] +; DEFAULT-NEXT: store [[TMP25]], ptr [[TMP27]], align 1, !alias.scope [[META14:![0-9]+]], !noalias [[META11]] ; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], [[TMP20]] ; DEFAULT-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -228,7 +215,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC1]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -247,10 +234,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-LABEL: define void @trunc_store( ; PRED-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i16 [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; PRED-NEXT: entry: -; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP7]], 16 -; PRED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 0, [[TMP1]] -; PRED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; PRED-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; PRED: vector.memcheck: ; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 ; PRED-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]] @@ -258,35 +242,28 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; PRED-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; PRED: vector.ph: -; PRED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 0, [[TMP3]] -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 0, [[N_MOD_VF]] -; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 16 -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[X]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PRED-NEXT: [[TMP12:%.*]] = trunc [[BROADCAST_SPLAT]] to +; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT1]], <16 x i16> poison, <16 x i32> zeroinitializer +; PRED-NEXT: [[TMP3:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT2]] to <16 x i8> ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; PRED-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META4:![0-9]+]] -; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; PRED-NEXT: [[TMP9:%.*]] = trunc [[BROADCAST_SPLAT2]] to -; PRED-NEXT: [[TMP10:%.*]] = and [[TMP9]], [[TMP12]] +; PRED-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META4:![0-9]+]] +; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer +; PRED-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT]] to <16 x i8> +; PRED-NEXT: [[TMP4:%.*]] = and <16 x i8> [[TMP2]], [[TMP3]] ; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] ; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0 -; PRED-NEXT: store [[TMP10]], ptr [[TMP6]], align 1, !alias.scope [[META7:![0-9]+]], !noalias [[META4]] -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; PRED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; PRED-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1, !alias.scope [[META7:![0-9]+]], !noalias [[META4]] +; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; PRED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; PRED-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]] -; PRED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 6a7263d6498535..0b3f28e8db5c4d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -19,19 +19,19 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-EMPTY: ; VPLANS-NEXT: vector.ph: ; VPLANS-NEXT: EMIT vp<[[NEWTC:%[0-9]+]]> = TC > VF ? TC - VF : 0 vp<[[TC]]> -; VPLANS-NEXT: EMIT vp<[[VF:%[0-9]+]]> = VF * Part + ir<0> -; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%[0-9]+]]> = active lane mask vp<[[VF]]>, vp<[[TC]]> +; VPLANS-NEXT: EMIT vp<[[VF:%.+]]> = VF * Part + ir<0> +; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%.+]]> = active lane mask vp<[[VF]]>, vp<[[TC]]> ; VPLANS-NEXT: Successor(s): vector loop ; VPLANS-EMPTY: ; VPLANS-NEXT: vector loop: { ; VPLANS-NEXT: vector.body: ; VPLANS-NEXT: EMIT vp<[[INDV:%[0-9]+]]> = CANONICAL-INDUCTION -; VPLANS-NEXT: ACTIVE-LANE-MASK-PHI vp<[[LANEMASK_PHI:%[0-9]+]]> = phi vp<[[LANEMASK_ENTRY]]>, vp<[[LANEMASK_LOOP:%[0-9]+]]> +; VPLANS-NEXT: ACTIVE-LANE-MASK-PHI vp<[[LANEMASK_PHI:%[0-9]+]]> = phi vp<[[LANEMASK_ENTRY]]>, vp<[[LANEMASK_LOOP:%.+]]> ; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1> ; VPLANS-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEP]]> ; VPLANS-NEXT: vp<[[VEC_PTR:%[0-9]+]]> = vector-pointer ir<%gep> ; VPLANS-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%val>, vp<[[LANEMASK_PHI]]> -; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%[0-9]+]]> = add vp<[[INDV]]>, vp<[[VFxUF]]> +; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%.+]]> = add vp<[[INDV]]>, vp<[[VFxUF]]> ; VPLANS-NEXT: EMIT vp<[[INC:%[0-9]+]]> = VF * Part + vp<[[INDV]]> ; VPLANS-NEXT: EMIT vp<[[LANEMASK_LOOP]]> = active lane mask vp<[[INC]]>, vp<[[NEWTC]]> ; VPLANS-NEXT: EMIT vp<[[NOT:%[0-9]+]]> = not vp<[[LANEMASK_LOOP]]> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 04ac89518502aa..0c41477f285d0a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -313,68 +313,36 @@ for.exit: define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 { ; CHECK-LABEL: define void @histogram_8bit( ; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: iter.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP9]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 4 -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP7]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -16 +; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], [[TMP10]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8( [[TMP20]], i8 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], [[TMP6]] +; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i8( [[TMP7]], i8 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 3 -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP12]] -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG8:%.*]] = mul nsw i64 [[TMP13]], -8 -; CHECK-NEXT: [[N_VEC3:%.*]] = and i64 [[N]], [[DOTNEG8]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 3 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX4]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load , ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = zext [[WIDE_LOAD5]] to -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], [[TMP17]] -; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv8p0.i8( [[TMP18]], i8 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP15]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY1]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_EXIT]], label [[SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY2]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] ; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_INDICES]], align 4 ; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64 @@ -384,7 +352,7 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) ; CHECK-NEXT: store i8 [[INC]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY2]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -425,7 +393,7 @@ define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N ; CHECK-NEXT: store float [[INC]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -468,7 +436,7 @@ define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %ind ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -526,7 +494,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP21]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -544,7 +512,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -596,7 +564,7 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 { ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP11]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -614,7 +582,7 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 { ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -666,7 +634,7 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP7]], i32 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -684,7 +652,7 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado ; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4 ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -733,13 +701,13 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -806,7 +774,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -827,7 +795,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -919,7 +887,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( [[TMP6]], i64 1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -936,7 +904,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: store i64 [[INC]], ptr [[GEP_BUCKET]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index 691c743be7d74d..dec3c286345adf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -24,25 +24,25 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 +; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -107,25 +107,25 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = sext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = sext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 +; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index a1a13f1e0c377b..4a2f9d07ed91c6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=WIDE -; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -vectorizer-maximize-bandwidth=false -vectorizer-maximize-bandwidth-for-vector-calls=false -S | FileCheck %s --check-prefixes=NARROW +; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -vectorizer-maximize-bandwidth-for-vector-calls=false -S | FileCheck %s --check-prefixes=NARROW target triple = "aarch64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 90c209cf3f5186..1326751a847d7d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -37,7 +37,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: vector loop: { ; IF-EVL-INLOOP-NEXT: vector.body: ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION -; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> +; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -48,7 +48,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-NEXT: } @@ -86,7 +86,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; NO-VP-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> ; NO-VP-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> -; NO-VP-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-OUTLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-NEXT: } @@ -125,7 +125,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> ; NO-VP-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> ; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reduce.add (ir<[[LD1]]>) -; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index c14a8bce8f48d8..706b6f88829848 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -22,7 +22,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: vector loop: { ; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION -; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> @@ -38,7 +38,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors ; IF-EVL-NEXT: } @@ -65,7 +65,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> -; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; NO-VP-NEXT: No successors ; NO-VP-NEXT: } @@ -110,7 +110,7 @@ define void @safe_dep(ptr %p) { ; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr ir<%p>, ir<[[OFFSET]]> ; CHECK-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; CHECK-NEXT: WIDEN store vp<[[PTR2]]>, ir<[[V]]> -; CHECK-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index c26ab2017280a2..6d6cfb5e9d18ed 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -17,7 +17,7 @@ ; IF-EVL: vector loop: { ; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION - ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]> + ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%.+]]> ; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> @@ -36,7 +36,7 @@ ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> - ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> + ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors ; IF-EVL-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll index 9b49d44141db39..1af03e740ef1ab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll @@ -36,7 +36,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[MASK]]> -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors ; IF-EVL-NEXT: } @@ -63,7 +63,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> -; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; NO-VP-NEXT: No successors ; NO-VP-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 5e4ea2c0bfc508..9de675b2853097 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -34,7 +34,7 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -44,11 +44,11 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> -; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = resume-phi vp<[[RESUME_2]]>, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> -; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]> +; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1 ; CHECK-NEXT: } ; entry: @@ -105,8 +105,8 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]> = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -116,13 +116,13 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> -; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = resume-phi vp<[[RESUME_2]]>, ir<33> -; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]> = resume-phi vp<[[RESUME_3]]>, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]>.2 = resume-phi vp<[[RESUME_3]]>.2, ir<33> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> -; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]> -; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3_P]]> +; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]>.1 +; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3_P]]>.2 ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll index 5ea27994b356da..27d81de260d3b9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll @@ -18,9 +18,9 @@ define void @test_unused_interleave(ptr %src, i32 %length) { ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%3> -; CHECK-NEXT: EMIT vp<%3> = add nuw vp<%2>, vp<%0> -; CHECK-NEXT: EMIT branch-on-count vp<%3>, vp<%1> +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: diff --git a/llvm/test/Transforms/Sink/invariant-load.ll b/llvm/test/Transforms/Sink/invariant-load.ll index 1aab4a96963230..c8fb119acd30a8 100644 --- a/llvm/test/Transforms/Sink/invariant-load.ll +++ b/llvm/test/Transforms/Sink/invariant-load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=sink -S < %s | FileCheck %s -; Loads marked invariant can be sunk across critical edges +; Loads marked invariant can be sunk across critical edges. define <4 x float> @invariant_load(ptr %in, i32 %s) { ; CHECK-LABEL: @invariant_load( @@ -12,7 +12,7 @@ define <4 x float> @invariant_load(ptr %in, i32 %s) { ; CHECK-NEXT: [[Z:%.*]] = add i32 [[S]], 1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[V:%.*]] = load <4 x float>, ptr [[IN:%.*]], align 16, !invariant.load !0 +; CHECK-NEXT: [[V:%.*]] = load <4 x float>, ptr [[IN:%.*]], align 16, !invariant.load [[META0:![0-9]+]] ; CHECK-NEXT: ret <4 x float> [[V]] ; main_body: @@ -26,4 +26,67 @@ end: ret <4 x float> %v } +; Loads that aren't marked invariant but used in one branch +; can be sunk to that branch. + +define void @invariant_load_use_in_br(ptr %p, i1 %cond) { +; CHECK-LABEL: @invariant_load_use_in_br( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[TRUE_BR:%.*]], label [[FALSE_BR:%.*]] +; CHECK: true.br: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: false.br: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: call void @fn(i32 [[VAL]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %p + br i1 %cond, label %true.br, label %false.br +true.br: + call void @fn() + br label %exit +false.br: + call void @fn(i32 %val) + br label %exit +exit: + ret void +} + +; TODO: Invariant loads marked with metadata can be sunk past calls. + +define void @invariant_load_metadata_call(ptr %p, i1 %cond) { +; CHECK-LABEL: @invariant_load_metadata_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br i1 [[COND:%.*]], label [[TRUE_BR:%.*]], label [[FALSE_BR:%.*]] +; CHECK: true.br: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: false.br: +; CHECK-NEXT: call void @fn(i32 [[VAL]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %val = load i32, ptr %p, !invariant.load !0 + call void @fn() + br i1 %cond, label %true.br, label %false.br +true.br: + call void @fn() + br label %exit +false.br: + call void @fn(i32 %val) + br label %exit +exit: + ret void +} + +declare void @fn() + !0 = !{} diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 3eb6d8b8eea9b4..aad5794fd8c278 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -34,7 +34,7 @@ # # CHECK: << Total TLI yes SDK no: 18 # CHECK: >> Total TLI no SDK yes: 0 -# CHECK: == Total TLI yes SDK yes: 256 +# CHECK: == Total TLI yes SDK yes: 259 # # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*) # WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int) @@ -48,14 +48,14 @@ # WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl' # WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}} # WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}} -# WRONG_SUMMARY: == Total TLI yes SDK yes: 255 +# WRONG_SUMMARY: == Total TLI yes SDK yes: 258 # ## The -COUNT suffix doesn't care if there are too many matches, so check ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 507 symbols, 274 available -# AVAIL-COUNT-274: {{^}} available +# AVAIL: TLI knows 510 symbols, 277 available +# AVAIL-COUNT-277: {{^}} available # AVAIL-NOT: {{^}} available # UNAVAIL-COUNT-233: not available # UNAVAIL-NOT: not available @@ -654,6 +654,18 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL + - Name: ilogb + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: ilogbf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: ilogbl + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL - Name: logb Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 4975651b1e502f..b4856b50bbe584 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -266,6 +266,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare double @log2(double)\n" "declare float @log2f(float)\n" "declare x86_fp80 @log2l(x86_fp80)\n" + "declare i32 @ilogb(double)\n" + "declare i32 @ilogbf(float)\n" + "declare i32 @ilogbl(x86_fp80)\n" "declare double @logb(double)\n" "declare float @logbf(float)\n" "declare x86_fp80 @logbl(x86_fp80)\n" diff --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc index 5d5720c3162da9..749c5780fbac3d 100644 --- a/llvm/unittests/CodeGen/MFCommon.inc +++ b/llvm/unittests/CodeGen/MFCommon.inc @@ -14,7 +14,9 @@ public: MachineBasicBlock &MBB) const override {} void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override {} - bool hasFP(const MachineFunction &MF) const override { return false; } + +protected: + bool hasFPImpl(const MachineFunction &MF) const override { return false; } }; static TargetRegisterClass *const BogusRegisterClasses[] = {nullptr}; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 4926afbfc6d8ce..00a3c737c0e47a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -133,8 +133,8 @@ compound=true N2 -> N4 [ label="" ltail=cluster_N3] N4 [label = "middle.block:\l" + - " EMIT vp\<%1\> = icmp eq ir\<%N\>, vp\<%0\>\l" + - " EMIT branch-on-cond vp\<%1\>\l" + + " EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" + + " EMIT branch-on-cond vp\<%cmp.n\>\l" + "Successor(s): ir-bb\, scalar.ph\l" ] N4 -> N5 [ label="T"] diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index c53f705a38db8f..29c64ba95ff856 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -2023,7 +2023,10 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { auto &DstI = Target.getInstruction(DstOp); StringRef DstIName = DstI.TheDef->getName(); - unsigned DstNumDefs = DstI.Operands.NumDefs, + // Count both implicit and explicit defs in the dst instruction. + // This avoids errors importing patterns that have inherent implicit defs. + unsigned DstExpDefs = DstI.Operands.NumDefs, + DstNumDefs = DstI.ImplicitDefs.size() + DstExpDefs, SrcNumDefs = Src.getExtTypes().size(); if (DstNumDefs < SrcNumDefs) { if (DstNumDefs != 0) @@ -2045,7 +2048,7 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { // The root of the match also has constraints on the register bank so that it // matches the result instruction. unsigned OpIdx = 0; - unsigned N = std::min(DstNumDefs, SrcNumDefs); + unsigned N = std::min(DstExpDefs, SrcNumDefs); for (unsigned I = 0; I < N; ++I) { const TypeSetByHwMode &VTy = Src.getExtType(I); diff --git a/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn index 84d569d3426544..1b193af6c30af7 100644 --- a/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn @@ -96,6 +96,7 @@ static_library("Basic") { "SourceManager.cpp", "SourceMgrAdapter.cpp", "Stack.cpp", + "StackExhaustionHandler.cpp", "TargetID.cpp", "TargetInfo.cpp", "Targets.cpp", diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 635273bcbc0208..d39c5fcdbc4286 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -250,6 +250,32 @@ struct VectorizationState { LinalgOp linalgOp, std::optional maybeMaskingMap); + /// Check whether this permutation map can be used for masking. At the + /// moment we only make sure that there are no broadcast dimensions, but this + /// might change if indexing maps evolve. + bool isValidMaskingMap(AffineMap maskingMap) { + return maskingMap.getBroadcastDims().size() == 0; + } + + /// Turn the input indexing map into a valid masking map. + /// + /// The input indexing map may contain "zero" results, e.g.: + /// (d0, d1, d2, d3) -> (d2, d1, d0, 0) + /// Applying such maps to canonical vector shapes like this one: + /// (1, 16, 16, 4) + /// would yield an invalid vector shape like this: + /// (16, 16, 1, 0) + /// Instead, drop the broadcasting dims that make no sense for masking perm. + /// maps: + /// (d0, d1, d2, d3) -> (d2, d1, d0) + /// This way, the corresponding vector/mask type will be: + /// vector<16x16x1xty> + /// rather than this invalid Vector type: + /// vector<16x16x1x0xty> + AffineMap getMaskingMapFromIndexingMap(AffineMap &indexingMap) { + return indexingMap.dropZeroResults(); + } + // Holds the compile-time static sizes of the iteration space to vectorize. // Dynamic dimensions are represented using ShapedType::kDynamic. SmallVector iterSpaceStaticSizes; @@ -360,6 +386,10 @@ VectorizationState::initState(RewriterBase &rewriter, LinalgOp linalgOp, Value VectorizationState::getOrCreateMaskFor( RewriterBase &rewriter, Operation *opToMask, LinalgOp linalgOp, std::optional maybeMaskingMap) { + + assert((!maybeMaskingMap || isValidMaskingMap(*maybeMaskingMap)) && + "Ill-formed masking map."); + // No mask is needed if the operation is not maskable. auto maskableOp = dyn_cast(opToMask); if (!maskableOp) @@ -429,20 +459,8 @@ VectorizationState::maskOperation(RewriterBase &rewriter, Operation *opToMask, LDBG("Trying to mask: " << *opToMask << "\n"); std::optional maybeMaskingMap = std::nullopt; - // The Operand indexing map may contain "zero" results, e.g.: - // (d0, d1, d2, d3) -> (d0, d1, d2, 0) - // When applied to canonical vector shapes like these: - // (1, 16, 16, 4) - // we would get: - // (1, 16, 16, 0) - // Instead, we should extract the following map permutation map for masking: - // (d0, d1, d2, d3) -> (d0, d1, d2) - // This way, the corresponding vector/mask type will be: - // vector<1x16x16xty> - // rather than: - // vector<1x16x16x0xty> if (maybeIndexingMap) - maybeMaskingMap = maybeIndexingMap->dropZeroResults(); + maybeMaskingMap = getMaskingMapFromIndexingMap(*maybeIndexingMap); // Create or retrieve mask for this operation. Value mask = diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp index d2ab4cabb32bf1..70b2aaf9a17e0b 100644 --- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp @@ -47,7 +47,7 @@ mlir::getReassociationIndicesForCollapse(ArrayRef sourceShape, break; int64_t currTargetShape = targetShape[targetDim]; - while (sourceDim < sourceShape.size() && + while (sourceDim < (sourceShape.size() - 1) && sourceShape[sourceDim] != ShapedType::kDynamic && prodOfCollapsedDims * sourceShape[sourceDim] < currTargetShape) { prodOfCollapsedDims *= sourceShape[sourceDim]; diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp index 3de4fb75ed831c..7e45f18b660ba7 100644 --- a/mlir/lib/Transforms/RemoveDeadValues.cpp +++ b/mlir/lib/Transforms/RemoveDeadValues.cpp @@ -589,7 +589,7 @@ void RemoveDeadValues::runOnOperation() { }); if (acceptableIR.wasInterrupted()) - return; + return signalPassFailure(); module->walk([&](Operation *op) { if (auto funcOp = dyn_cast(op)) { diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 0aa2d33ef17ed4..dbf0f0b81f6114 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -1251,6 +1251,29 @@ func.func @no_fold_expand_of_collapse_dynamic(%arg0 : tensor, %arg1: // ----- +func.func @compose_expand_of_collapse_last_two_dims(%arg0: tensor) -> tensor { + %collapsed = tensor.collapse_shape %arg0 [[0, 1, 2]] : tensor into tensor + %c0 = arith.constant 0 : index + %dim = tensor.dim %collapsed, %c0 : tensor + %c384= arith.constant 384 : index + %div = arith.divui %dim, %c384 : index + %expanded = tensor.expand_shape %collapsed [[0, 1]] output_shape [%div, 384] : tensor into tensor + return %expanded : tensor +} +// CHECK: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 * 64)> +// CHECK-LABEL: @compose_expand_of_collapse_last_two_dims +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[CONSTANT0:.+]] = arith.constant 0 : index +// CHECK: %[[CONSTANT384:.+]] = arith.constant 384 : index +// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2]] : tensor into tensor +// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[CONSTANT0]] : tensor +// CHECK: %[[AFFAPPLY:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]] +// CHECK: %[[DIVUI:.+]] = arith.divui %[[AFFAPPLY]], %[[CONSTANT384]] : index +// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[COLLAPSE]] {{\[}}[0, 1]] output_shape [%[[DIVUI]], 384] : tensor into tensor +// CHECK: return %[[RESULT]] + +// ----- + func.func @compose_expand_of_collapse(%arg0 : tensor<2x3x4x5x6x7x8xf32>) -> tensor<24x5x42x8xf32> { %0 = tensor.collapse_shape %arg0 [[0, 1, 2, 3, 4, 5, 6]] diff --git a/mlir/test/mlir-rewrite/simple.mlir b/mlir/test/mlir-rewrite/simple.mlir index ab6bfe24fccf03..66f17f093f5094 100644 --- a/mlir/test/mlir-rewrite/simple.mlir +++ b/mlir/test/mlir-rewrite/simple.mlir @@ -4,8 +4,7 @@ func.func @two_dynamic_one_direct_shape(%arg0: tensor, %arg1: tensor<2x4x?xf32>) -> tensor { // RENAME: "test.concat"({{.*}}) {bxis = 0 : i64} - // RANGE: 《%{{.*}} = 〖"test.concat"〗({{.*}}) {axis = 0 : i64} : (tensor, tensor<2x4x?xf32>) -> tensor》 + // RANGE: <%{{.*}} = ["test.concat"]({{.*}}) {axis = 0 : i64} : (tensor, tensor<2x4x?xf32>) -> tensor> %5 = "test.concat"(%arg0, %arg1) {axis = 0 : i64} : (tensor, tensor<2x4x?xf32>) -> tensor return %5 : tensor } - diff --git a/mlir/tools/mlir-rewrite/mlir-rewrite.cpp b/mlir/tools/mlir-rewrite/mlir-rewrite.cpp index 308e6490726c86..e70aa5d41aa049 100644 --- a/mlir/tools/mlir-rewrite/mlir-rewrite.cpp +++ b/mlir/tools/mlir-rewrite/mlir-rewrite.cpp @@ -320,25 +320,25 @@ LogicalResult markRanges(RewritePad &rewriteState, raw_ostream &os) { for (auto it : rewriteState.getOpDefs()) { auto [startOp, endOp] = getOpRange(it); - rewriteState.insertText(startOp, "《"); - rewriteState.insertText(endOp, "》"); + rewriteState.insertText(startOp, "<"); + rewriteState.insertText(endOp, ">"); auto nameRange = getOpNameRange(it); if (isGeneric(it)) { - rewriteState.insertText(nameRange.Start, "〖"); - rewriteState.insertText(nameRange.End, "〗"); + rewriteState.insertText(nameRange.Start, "["); + rewriteState.insertText(nameRange.End, "]"); } else { - rewriteState.insertText(nameRange.Start, "〔"); - rewriteState.insertText(nameRange.End, "〕"); + rewriteState.insertText(nameRange.Start, "!["); + rewriteState.insertText(nameRange.End, "]!"); } } // Highlight all comment lines. // TODO: Could be replaced if this is kept in memory. for (auto commentLine : rewriteState.getSingleLineComments()) { - rewriteState.insertText(commentLine.Start, "❰"); - rewriteState.insertText(commentLine.End, "❱"); + rewriteState.insertText(commentLine.Start, "{"); + rewriteState.insertText(commentLine.End, "}"); } return success(); diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 8f3bbe68648fef..1abc0ccda4c7e6 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -103,6 +103,17 @@ libc_support_library( deps = [":llvm_libc_macros_float_macros"], ) +libc_support_library( + name = "llvm_libc_types_cfloat128", + hdrs = ["include/llvm-libc-types/cfloat128.h"], + deps = [":llvm_libc_macros_float_macros"], +) + +libc_support_library( + name = "llvm_libc_types_cfloat16", + hdrs = ["include/llvm-libc-types/cfloat16.h"], +) + libc_support_library( name = "llvm_libc_macros_fcntl_macros", hdrs = ["include/llvm-libc-macros/linux/fcntl-macros.h"], @@ -268,6 +279,16 @@ libc_support_library( hdrs = ["src/__support/macros/properties/os.h"], ) +libc_support_library( + name = "__support_macros_properties_complex_types", + hdrs = ["src/__support/macros/properties/complex_types.h"], + deps = [ + ":__support_macros_properties_types", + ":llvm_libc_types_cfloat128", + ":llvm_libc_types_cfloat16", + ], +) + libc_support_library( name = "__support_macros_properties_types", hdrs = ["src/__support/macros/properties/types.h"], @@ -493,6 +514,7 @@ libc_support_library( deps = [ ":__support_macros_attributes", ":__support_macros_config", + ":__support_macros_properties_complex_types", ":__support_macros_properties_types", ":llvm_libc_macros_stdfix_macros", ],